Ran the below commands in Unix shell

cd /data02/Analysis/Projects/2_CPE_Transmission/VennDiagram_of_genes_between_species/CPE_Transmission_Data_Analysis


#---------- Identity plasmids and chromosomes using PlasClass

time for j in $(ls *.fasta); do echo $j; time python3.7 ~/sw/PlasClass/classify_fasta.py -f $j -o "$j".plasclass.probs.out -p 60;  done
real    464m57.242s
user    592m31.525s
sys 133m29.767s

mkdir ../1_plasClass_Probs
mv *.plasclass.probs.out ../1_plasClass_Probs
$ awk '{print FILENAME "\t" $0}' *.out >CPE_Trans_PlasClass_Predictions_comb.tab



#---------- Identify Transposons using Bacant (Integron module is not working properly and Resistance genes are used from AMRfinderplus)
time for d in $(ls *.fasta);  do  
echo "$d";
bacant -n "$d" -o "$d"_bacant_out &
done

mkdir ../2_bacant_annot
mv *_bacant_out ../2_bacant_annot

awk '{$13=$14=""; print FILENAME "\t" $1 "\t" $2}' *_bacant_out/transposon.filter.tsv | fgrep -v 'QUERY' | sed 's/_bacant_out\/transposon.filter.tsv//g' | sed -e 's/_circ[0-9]*/_circ/g' -e 's/\(depth_[0-9]*.[0-9]*x\)[0-9]*/\1/g' | awk '{print $1 "\t" $2 "\t" $1"#"$2 "\t" $3}' >CPE_Trans_BacAnt_Transposons.tab 

awk '{print FILENAME "\t" $0}' *_bacant_out/integron.filter.tsv | fgrep -v 'QUERY' | sed 's/_bacant_out\/integron.filter.tsv//g' | sed 's/[0-9]*\~integron_..//g' | awk '{print $1 "\t" $2 "\t" $1"#"$2 "\t" $3}' >CPE_Trans_BacAnt_Integrons.tab

#---------- Identify IS using Prokka (NOTE: I used an updated ISdb (default ISdb for prokka) located in my desktop)
time awk '{print FILENAME "\t" $0}' *_prokka_out/*.gff | grep 'ISfinder'  | sed 's/ /_/g' | awk '{print $1 "\t" $2 "\t" $NF}' | sed -e 's/ID=.*ISfinder://g' -e 's/;locus_tag.*//g' -e 's/html.2020\/\///g' | sed 's/_prokka_out.*gff/_assembly_renamed.fasta/g' | awk '{print $1 "\t" $2 "\t" $1"#"$2 "\t" $3}' >../../VennDiagram_of_genes_between_species/CPE_Transmission_Data_Analysis/6_Prokka_InsertionSequences/CPE_Trans_Prokka_IS.tab 



#---------- Identity AMR genes using NCBI AMRfinderplus

# NOTE: Ran this earlier, so just copying the results

mkdir ../3_amrfinderplus_results 

#---------- Identity Virulence factor genes using Abricate

for d in $(ls *.fasta); do echo "abricate -db vfdb $d >"$d".vfdb.tab"; done >abricate_cmds.sh
time parallel --jobs 62 < abricate_cmds.sh &

mkdir ../4_abricate_vf_results 
mv *.vfdb.tab ../4_abricate_vf_results
cd ../4_abricate_vf_results

cat *_assembly_renamed.fasta.vfdb.tab | fgrep -v '#' | awk '{print $1 "\t" $2 "\t" $3 "\t" $4 "\t" $5 "\t" $6}' >CPE_Trans_Abricate_VF_Genes.tab 


## Excluding possible contamination samples
-rwxrwxr-x 1 prakki prakki 9.8M Jul  6 14:17 batch12_27072019_ENT1410_assembly_renamed.fasta
-rwxrwxr-x 1 prakki prakki  11M Jul  6 14:17 batch2_21032019_ENT466_assembly_renamed.fasta
-rwxrwxr-x 1 prakki prakki  11M Jul  6 14:17 batch2_21032019_ENT497_assembly_renamed.fasta
-rwxrwxr-x 1 prakki prakki  11M Jul  6 14:18 batch2_21032019_ENT378_assembly_renamed.fasta
-rwxrwxr-x 1 prakki prakki  11M Jul  6 14:18 rbatch2_07082020_ENT792_assembly_renamed.fasta

# These assemblies have abnormal assembly sizes. So excluding them from analysis

## -- Reunning MLST 

for d in $(ls *.fasta); do echo "mlst $d >"$d".mlst.tab"; done >mlst_cmds.sh
time parallel --jobs 62 < mlst_cmds.sh &

Ran the below commands in R

library(dplyr)
library(UpSetR)
library(tidyverse)
library(splitstackshape)
library(reshape2)
library(randomcoloR)
library(ComplexHeatmap)
library(data.table)
library(kableExtra)
library(DT)
library(data.table)
library(formattable)




setwd("/data02/Analysis/Projects/2_CPE_Transmission/VennDiagram_of_genes_between_species/CPE_Transmission_Data_Analysis")


hybridAssemb_1198list <- readLines("/data02/Analysis/Projects/2_CPE_Transmission/VennDiagram_of_genes_between_species/CPE_Transmission_Data_Analysis/CPE_Hyrbid_Assemblies_1198.list")
#head(hybridAssemb_1198list)

###@@@@@@@@@@@@@@ Dataframe1 - PlasClass Predictions

plasClass_Prob_df <- data.table::fread(file = "1_plasClass_probs/CPE_Trans_PlasClass_Predictions_comb.tab", sep = "\t", header = FALSE)
#head(plasClass_Prob_df)

colnames(plasClass_Prob_df) <- c("Fasta", "Contig", "Probability")

plasClass_Prob_df <- 
  plasClass_Prob_df %>%  
  mutate(Fasta = stringr::str_replace(Fasta, ".plasclass.probs.out", "")) %>% 
  mutate(Fasta_Contig=paste(Fasta, Contig, sep = '#')) %>% 
  mutate(Classification = if_else(Probability >= 0.6, "Plasmid", "Chromosome")) 

#head(plasClass_Prob_df)

###@@@@@@@@@@@@@@ Dataframe2 - MLST

mlst_df <- data.table::fread(file = "5_MLST/mlst_log2.19", sep = "\t", header = FALSE, fill = TRUE)
#head(mlst_df) # mlst_df now has 1289 assembly information
colnames(mlst_df) <- c("Fasta", "Species", "ST", "Gene1", "Gene2", "Gene3", "Gene4", "Gene5", "Gene6", "Gene7")
mlst_df <- mlst_df %>% filter(Fasta %in% hybridAssemb_1198list) # mlst_df now has only 1198 assembly information

Species_Counts <- as.data.frame(table(mlst_df$Species))
colnames(Species_Counts) <- c("Species","SampleCount")

Species_Counts %>% 
  kbl(caption = "Species and Sample Counts") %>%
  kable_classic(full_width = F, html_font = "Cambria")
Species and Sample Counts
Species SampleCount
16
cfreundii 75
cronobacter 6
ecloacae 176
ecoli 375
kaerogenes 15
koxytoca 14
kpneumoniae 519
senterica 2
#head(Species_Counts)

#----------------Step1: Combining MLST and plasClass dataframes

plasClass_Prob_df <- left_join(plasClass_Prob_df, mlst_df, by = c("Fasta")) %>% 
    select(Fasta,Contig,Fasta_Contig,Classification,Species, ST) %>% 
    mutate(Species_ContigClass=paste(Species,Classification, sep = '#'))

#head(plasClass_Prob_df)

###@@@@@@@@@@@@@@ Dataframe2 - Bacant Predictions Transposons

bacAnt_df <- data.table::fread(file = "2_bacant_annot/CPE_Trans_BacAnt_Transposons.tab", sep = "\t", header = TRUE)
#head(bacAnt_df)

colnames(bacAnt_df) <- c("Fasta", "Contig", "Fasta_Contig", "Transposon")

#----------------Step2: Combining plasClass dataframe with bacant dataframe for plotting Transposons

bacant_plasClass_df <- left_join(bacAnt_df, plasClass_Prob_df, by = c("Fasta_Contig")) %>% 
  select(Fasta.x,Contig.x,Fasta_Contig,Transposon,Classification,Species_ContigClass)

#head(bacant_plasClass_df)

bacant_plasClass_df <- bacant_plasClass_df %>% filter(Fasta.x %in% hybridAssemb_1198list) # bacant_plasClass_df now has only 1198 assembly information

# bacant_plasClass_df %>%
#   select(Transposon,Species_ContigClass) %>%
#   table() %>% # frequency of unique transposons
#   kbl(caption = "Transposons and their counts in Chromosomes and Plasmids across Species") %>%
#   kable_classic(full_width = F, html_font = "Cambria")

customGreen0 = "#DeF7E9"
customRed = "#ff7f7f"

bacant_plasClass_df_total_col = bacant_plasClass_df %>%
  select(Fasta.x,Transposon,Species_ContigClass) %>%
  unique() %>%
  select(Species_ContigClass) %>% 
  unique() %>% 
  drop_na(Species_ContigClass) %>% count() %>% c()

Transposons

Table1

Percentages of Samples carrying transposons in Chromsomes and Plasmids across Species

# percentages of transposons (https://clarewest.github.io/blog/post/making-tables-shiny/)
bacant_plasClass_df %>%
  select(Fasta.x,Transposon,Species_ContigClass) %>%
  unique() %>%
  select(Transposon,Species_ContigClass) %>%
  count(Transposon, Species_ContigClass) %>%
  rename(Freq = n) %>%
  drop_na(Species_ContigClass) %>%
  mutate(Species_ContigClass2=Species_ContigClass) %>%
  separate(Species_ContigClass2, sep = "#", into = c("Species", "ContigClass")) %>%
  left_join(Species_Counts, by = c("Species")) %>%
  mutate(SamplePrcnt_wGeneElement=Freq*100/SampleCount) %>%
  select(Transposon,Species_ContigClass,SamplePrcnt_wGeneElement) %>%
  data.table::dcast(Transposon~Species_ContigClass, value.var="SamplePrcnt_wGeneElement") %>%
  replace(is.na(.), 0) %>%
  mutate_if(is.numeric, round, digits=1) %>% 
  formattable(., align = c("l",rep("c", bacant_plasClass_df_total_col$n)), 
              list(
    `Transposon` = formatter("span", style = ~ style(color = "grey",font.weight = "bold")), 
    area(col = 1:bacant_plasClass_df_total_col$n+1) ~ color_tile(customGreen0, customRed))) %>% 
  as.datatable(escape = FALSE,
               options = list(scrollX = FALSE, dom = 'lfti',
                              lengthMenu = list(c(5, 10, 25, 100, -1), c('5','10','25', '100', 'All')),
                              pageLength = 5),
               rownames = FALSE)
#head(bacant_plasClass_df)

Table2

Transposons and their unique counts in Chromsomes and Plasmids across Species

bacant_plasClass_df %>% 
  select(Transposon,Species_ContigClass) %>% 
  unique() %>% 
  table() %>% # frequency of unique transposons
  as.data.frame.matrix() %>% 
  kbl(caption = "Transposons and their unique counts in Chromsomes and Plasmids across Species") %>% 
  kable_classic(full_width = F, html_font = "Cambria") 
Transposons and their unique counts in Chromsomes and Plasmids across Species
-#Chromosome -#Plasmid cfreundii#Chromosome cfreundii#Plasmid cronobacter#Plasmid ecloacae#Chromosome ecloacae#Plasmid ecoli#Chromosome ecoli#Plasmid kaerogenes#Chromosome kaerogenes#Plasmid koxytoca#Chromosome koxytoca#Plasmid kpneumoniae#Chromosome kpneumoniae#Plasmid senterica#Plasmid
Tn10|AP005147 1 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0
Tn1000|KX709966 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
Tn1721|X61367 0 1 1 1 0 0 0 1 1 0 0 0 0 0 1 0
Tn2|KX242350 0 1 1 0 0 0 1 1 1 0 0 0 0 0 1 0
Tn2003|AY532647 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0
Tn2006|EF127491 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Tn2007|EF059914 1 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0
Tn2008|GQ861438 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Tn2012|EU523120 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 0
Tn21 1 0 0 1 0 1 1 1 1 0 0 0 0 1 1 0
Tn21-like 1 1 1 1 0 1 1 1 1 0 0 0 0 1 1 0
Tn21|CP001182 1 1 1 1 0 0 1 1 1 0 1 0 0 1 1 0
Tn2555.3|AY485150.1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0
Tn3411|M19532 1 0 1 0 0 0 0 1 1 0 0 0 0 1 0 0
Tn3926|X78059 0 0 1 1 0 1 0 0 1 0 0 0 0 0 1 0
Tn4|HM749966 1 0 1 1 0 0 1 1 1 0 0 0 0 1 1 0
Tn402|KC999035 1 0 0 1 0 1 1 1 1 0 0 0 0 0 1 0
Tn4352|HQ840942 1 0 1 1 0 1 0 1 1 0 0 0 0 1 1 0
Tn4401a|KT378596 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0
Tn4662a 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0
Tn501|Z00027 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0
Tn5036-like|GQ160960 0 0 0 1 0 0 0 0 1 0 0 0 0 1 1 0
Tn5036|Y09025 0 0 1 1 0 0 1 0 1 0 0 0 0 1 1 0
Tn5044/Tn5046 0 0 0 1 0 0 1 0 0 0 0 0 0 1 1 0
Tn5057|AJ302768 1 0 0 0 0 1 1 1 1 0 0 0 0 1 1 0
Tn5060|AJ551280 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
Tn5073|AF461013 0 0 1 1 0 1 1 1 1 0 1 0 1 1 1 0
Tn5074|AF461012 1 1 0 0 0 1 1 1 0 0 0 0 0 0 1 0
Tn5075|AF457211 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
Tn511-3|EU287476 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0
Tn512|EU306744 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
Tn5393|M96392 0 0 0 0 0 1 1 1 0 0 0 0 0 0 1 0
Tn5393c|AF313472 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0
Tn5403|EU287476 0 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1
Tn5718delta|AM261760 1 0 0 0 0 1 1 1 1 0 0 0 0 1 1 0
Tn6018-L|CP001182 0 0 1 1 0 1 0 1 0 0 0 0 0 0 1 0
Tn602|AH000951 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
Tn6022|CP001182 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Tn6029|HQ840942 1 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0
Tn6082|AP005147 0 1 0 0 1 1 1 1 1 0 0 0 0 1 1 0
Tn6196|KC999035 0 1 1 1 0 1 1 1 1 0 1 1 0 1 1 0
Tn6205|CP003505 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 0
Tn6229|KF295829 0 0 0 1 0 1 1 0 0 0 0 0 0 1 1 0
Tn6238|KJ511462 0 0 0 0 0 0 1 1 1 0 0 0 0 1 1 0
Tn6292 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0
Tn6934|CP046417.1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
Tn7-like|AM261760 1 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0
Tn7|AP002527 1 0 1 0 0 0 0 1 0 0 0 0 0 1 0 0
Tn7051|MN915011.1 0 0 0 1 0 0 1 1 1 0 0 0 0 0 1 0
Tn801|AF080442 1 1 1 1 0 0 1 1 1 0 1 0 0 1 1 0
Tn9-like|HQ840942 1 0 1 0 0 1 1 1 1 0 1 0 0 1 1 0
Tn917|FJ208941 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0

Transposons UpsetR

UpsetR Plot Transposons

#----------------Step3: UpsetR Plot Transposons
 
tn_upsetR_df <- bacant_plasClass_df %>% select(Transposon,Species_ContigClass)
#View(tn_upsetR_df)
tn_upsetR_lt <- split(tn_upsetR_df$Transposon, tn_upsetR_df$Species_ContigClass) 
tn_comb_mat = make_comb_mat(tn_upsetR_lt)

#tn_comb_mat
#str(tn_comb_mat)

#as.data.frame(tn_comb_mat)
# comb_mat
# UpSet(tn_comb_mat)
# UpSet(t(comb_mat))
# UpSet(comb_mat, top_annotation = upset_top_annotation(comb_mat, add_numbers = TRUE),
# right_annotation = upset_right_annotation(comb_mat, add_numbers = TRUE))

 
col_size = comb_size(tn_comb_mat)
row_size = set_size(tn_comb_mat)

ht = UpSet(tn_comb_mat, 
      row_title = "Species#SeqClassification", column_title = "Transposon Intersection across species",
      pt_size = unit(6, "pt"),
      lwd = unit(2, "pt"),
      #comb_col = "red",
      # bg_col = c("#d6b23b","#81dce2","#acf49a","#a784d8","#acf49a","#81dce2","#1a51dd","#1d7a01","#683cd8","#d62c4c","#683cd8","#1d7a01" , "#dd71c0","#1a51dd","#dd71c0","#d62c4c"),
      #bg_col = upset_row_col$Color,
      top_annotation = upset_top_annotation(tn_comb_mat, gp = gpar(fill = "#009797"), add_numbers = FALSE, 
                                            bar_width = 0.5, annotation_name_rot = 90),
      right_annotation = upset_right_annotation(tn_comb_mat, add_numbers = TRUE, gp = gpar(fill = "#009797")),
      show_row_names = TRUE,
      row_names_gp = gpar(fontsize = 9), #changes font size of "set size" labels
      width = unit(700, units = "pt"), height = unit(10, "cm"))

ht = draw(ht)
#ht
col_od = column_order(ht)
row_od = row_order(ht)

decorate_annotation("intersection_size", {
  grid.text(col_size[col_od], 
            seq_len(length(col_size)), 
            unit(col_size[col_od], "native") + unit(2, "mm"), 
            default.units = "native", just = "bottom",
            gp = gpar(fontsize = 8))
})

Table4

Transposons proportions in Chromsome and plasmids by Species

bacant_TransposonCounts <- bacant_plasClass_df %>% 
  separate(Species_ContigClass, sep = "#", into = c("Species", "ContigClass")) %>%
  select(Species,Transposon,ContigClass) %>% 
  unique() %>% 
  mutate(Species_Transposon=paste(Species, Transposon, sep = '#')) %>% 
  select(Species_Transposon,ContigClass) %>% 
  table() %>% as.data.frame.matrix() %>% 
  mutate(BothChr_AND_Plasmid = case_when(Chromosome == 1 & Plasmid == 1 ~ 1, TRUE ~ 0)) %>% 
  tibble::rownames_to_column("Species#Transposon") %>% 
  separate("Species#Transposon", sep = "#", into = c("Species", "Transposon")) %>% 
  group_by(Species) %>% 
  summarise(Count_Transposon_in_Chr_old=sum(Chromosome), Count_Transposon_in_Plasmid_old=sum(Plasmid), Count_Transposon_in_Chr_Plasmid=sum(BothChr_AND_Plasmid)) %>% 
  mutate(Count_Transposon_in_Chr = Count_Transposon_in_Chr_old - Count_Transposon_in_Chr_Plasmid, 
         Count_Transposon_in_Plasmid = Count_Transposon_in_Plasmid_old - Count_Transposon_in_Chr_Plasmid) %>% 
  select(Species,Count_Transposon_in_Chr,Count_Transposon_in_Plasmid,Count_Transposon_in_Chr_Plasmid) %>% 
  column_to_rownames(var="Species")

# divides each cell value with corresponding row sum value
bacant_TransposonCounts_props <- bacant_TransposonCounts*100/rowSums(bacant_TransposonCounts) 

bacant_TransposonCounts_props %>% 
  kbl(caption = "Transposons proportions in Chromsome and plasmids by Species") %>% 
  kable_classic(full_width = F, html_font = "Cambria")
Transposons proportions in Chromsome and plasmids by Species
Count_Transposon_in_Chr Count_Transposon_in_Plasmid Count_Transposon_in_Chr_Plasmid
39.393939 24.24242 36.36364
cfreundii 15.151515 36.36364 48.48485
cronobacter 0.000000 100.00000 0.00000
ecloacae 12.903226 32.25806 54.83871
ecoli 13.157895 15.78947 71.05263
kaerogenes 0.000000 80.00000 20.00000
koxytoca 12.500000 75.00000 12.50000
kpneumoniae 7.692308 30.76923 61.53846
senterica 0.000000 100.00000 0.00000

Plot2

Transposons Percentages in Chromsome and plasmids by Species

bacant_TransposonCounts_props %>% 
  rownames_to_column(var="Species") %>% 
  reshape2::melt(variable = "Transposon location", value.name = "Percentage") %>% 
  na_if(0) %>% 
  ggplot(aes(x = Species, y=Percentage, fill = `Transposon location`, label = Percentage)) + 
  geom_bar(width = 0.4, position = position_stack(), stat = "identity")+
  scale_fill_manual(labels=c('Chromosome', 'Plasmid',"Both"), values = c("#fcb6b1", "#facc96", "#4dbfbc")) +
  geom_text(aes(label=paste0(sprintf("%1.1f", Percentage),"%")), position=position_stack(vjust=0.5)) +
  scale_x_discrete(expand = c(0,0)) +
  ylab('Percentage')+
  xlab('Species')+
  ggtitle('Percentages of Transposons by their location')+
  theme_classic() +
  theme(axis.text.y=element_text(size=16,  vjust = 0, hjust = 1),
        axis.text.x=element_text(size=16, angle=90, vjust = 0, hjust = 1.2),
        axis.line=element_blank(),
        axis.ticks=element_blank(),
        legend.position="top") + coord_flip()

Integrons

Table1

Percentages of Samples carrying Integrons in Chromsomes and Plasmids across Species

###@@@@@@@@@@@@@@ Dataframe2 - Bacant Predictions Integrons

bacAnt_Integrons_df <- data.table::fread(file = "2_bacant_annot/CPE_Trans_BacAnt_Integrons.tab", sep = "\t", header = TRUE)
#head(bacAnt_Integrons_df)

colnames(bacAnt_Integrons_df) <- c("Fasta", "Contig", "Fasta_Contig", "Integron")

#----------------Step2: Combining plasClass dataframe with bacant dataframe for plotting Transposons

bacAnt_Integrons_plasClass_df <- left_join(bacAnt_Integrons_df, plasClass_Prob_df, by = c("Fasta_Contig")) %>% 
  select(Fasta.x,Contig.x,Fasta_Contig,Integron,Classification,Species_ContigClass)

#head(bacAnt_Integrons_plasClass_df)

bacAnt_Integrons_plasClass_df <- bacAnt_Integrons_plasClass_df %>% filter(Fasta.x %in% hybridAssemb_1198list) # bacAnt_Integrons_plasClass_df now has only 1198 assembly information

# bacAnt_Integrons_plasClass_df %>%
#   select(Integron,Species_ContigClass) %>%
#   table() %>% # frequency of unique Integrons
#   kbl(caption = "Integrons and their counts in Chromsomes and Plasmids across Species") %>%
#   kable_classic(full_width = F, html_font = "Cambria")

bacAnt_Integrons_plasClass_df_total_col = bacAnt_Integrons_plasClass_df %>%
  select(Fasta.x,Integron,Species_ContigClass) %>%
  unique() %>%
  select(Species_ContigClass) %>% 
  unique() %>% 
  count() %>% c()

# percentages of Integrons
bacAnt_Integrons_plasClass_df %>% 
  select(Fasta.x,Integron,Species_ContigClass) %>%
  unique() %>% 
  select(Integron,Species_ContigClass) %>% 
  count(Integron, Species_ContigClass) %>% 
  rename(Freq = n) %>%  
  drop_na(Species_ContigClass) %>% 
  mutate(Species_ContigClass2=Species_ContigClass) %>% 
  separate(Species_ContigClass2, sep = "#", into = c("Species", "ContigClass")) %>% 
  left_join(Species_Counts, by = c("Species")) %>% 
  mutate(SamplePrcnt_wGeneElement=Freq*100/SampleCount) %>% 
  select(Integron,Species_ContigClass,SamplePrcnt_wGeneElement) %>% 
  data.table::dcast(Integron~Species_ContigClass, value.var="SamplePrcnt_wGeneElement") %>% 
  replace(is.na(.), 0) %>% 
  mutate_if(is.numeric, round, digits=1) %>% 
  #kbl(caption = "Sample count percentages for the Integrons in Chromsomes and Plasmids across Species") %>% 
  #kable_classic(full_width = F, html_font = "Cambria")
  formattable(., align = c("l",rep("c", bacAnt_Integrons_plasClass_df_total_col$n)), 
              list(
    `Transposon` = formatter("span", style = ~ style(color = "grey",font.weight = "bold")), 
    area(col = 1:bacAnt_Integrons_plasClass_df_total_col$n+1) ~ color_tile(customGreen0, customRed))) %>% 
  as.datatable(escape = FALSE,
               options = list(scrollX = FALSE, dom = 'lfti',
                              lengthMenu = list(c(5, 10, 25, 100, -1), c('5','10','25', '100', 'All')),
                              pageLength = 5),
               rownames = FALSE)

Table2

bacAnt_Integrons_plasClass_df %>% 
  select(Integron,Species_ContigClass) %>% 
  unique() %>% 
  table() %>% # frequency of unique Integrons
  kbl(caption = "Integrons and their unique counts in Chromsomes and Plasmids across Species") %>% 
  kable_classic(full_width = F, html_font = "Cambria")
Integrons and their unique counts in Chromsomes and Plasmids across Species
-#Chromosome -#Plasmid cfreundii#Chromosome cfreundii#Plasmid cronobacter#Plasmid ecloacae#Chromosome ecloacae#Plasmid ecoli#Chromosome ecoli#Plasmid kaerogenes#Plasmid koxytoca#Plasmid kpneumoniae#Chromosome kpneumoniae#Plasmid
In1000|KF921521 0 0 0 0 0 0 0 0 0 0 0 0 1
In1002|KF921555 0 0 0 0 0 0 0 0 1 0 0 0 0
In1003|KF921556 0 0 0 0 0 0 0 1 1 0 0 0 1
In1004|KF921558 0 1 0 0 0 0 0 1 1 0 0 0 1
In1015|KJ420612 0 0 0 0 0 0 1 0 0 0 0 0 1
In104|AY463797 0 0 0 0 0 0 0 0 1 0 0 0 0
In1069|KM589497 0 0 0 1 0 0 1 1 0 0 0 1 1
In1114|KF914309 0 0 0 0 0 0 0 1 1 0 0 0 0
In1133|KM595287 0 0 0 0 0 1 0 0 0 0 0 0 0
In1207|KP901091 0 0 0 0 0 0 0 0 1 0 0 0 0
In1227|KR699626 0 0 0 0 0 0 0 0 0 0 0 0 1
In1239|KT290951 0 1 0 0 0 1 0 0 1 0 0 0 1
In1249|KT316808 0 0 1 0 0 0 0 1 1 0 0 0 1
In1330|MF612148 0 0 0 1 0 0 0 1 1 0 0 1 1
In1368|KY047413 0 0 1 0 0 0 0 0 0 0 0 1 0
In1372|LC224310 0 0 0 0 0 0 0 0 0 0 0 0 1
In141|FJ711659 0 0 0 1 0 0 0 0 1 0 0 0 1
In1411|KY426918 0 0 0 0 0 0 0 1 1 0 0 0 0
In1440|LC318534 1 0 0 0 0 0 0 1 1 0 0 0 0
In1469|LC333379 0 0 0 1 0 0 1 0 0 0 0 1 1
In149|FJ457611 0 0 0 0 0 1 0 0 0 0 0 0 1
In152|HQ386839 0 0 0 0 0 0 0 0 0 0 0 0 1
In153|JF701188 0 0 0 0 0 0 0 0 0 0 0 0 1
In193|DQ647028 0 0 0 0 0 0 1 0 0 0 0 0 1
In2-10|KF188201 1 0 1 0 0 0 0 1 0 0 0 0 0
In2-3|JX867127 0 0 0 0 0 0 0 0 0 0 0 1 0
In2-30|AB234887 0 0 0 0 0 0 0 0 0 0 0 1 0
In2-44|FJ591055 0 0 1 0 0 0 0 1 0 0 0 0 0
In206|FM957884 0 0 0 0 0 0 1 0 0 0 0 0 1
In221|DQ372710 0 0 0 0 0 0 0 0 1 0 0 0 0
In240|EF368053 0 1 0 1 0 0 1 1 0 1 0 0 1
In244|KC543497 0 0 0 0 0 1 0 0 0 0 0 0 0
In261|FN824509 0 0 0 0 0 0 1 1 0 0 0 0 0
In262|FN823039 0 0 0 0 0 0 0 0 0 0 0 0 1
In282|AY522431 0 0 0 0 0 0 0 1 1 0 0 1 0
In29|AF329699 0 0 0 0 0 0 0 0 0 0 0 0 1
In366|AY069972 0 0 0 0 0 0 1 0 0 0 0 0 0
In384|FM877477 0 0 0 0 0 0 0 0 1 0 0 0 0
In404|DQ520934 0 0 0 0 0 0 0 0 1 0 0 0 1
In415|EU934512 0 0 0 0 0 0 1 0 0 0 0 0 0
In440|AF227505 1 0 0 0 0 0 0 0 1 0 0 0 1
In473|FJ854362 1 0 0 0 0 0 0 1 0 0 0 0 0
In49|EU434618 0 0 0 1 0 0 1 0 0 0 0 1 1
In498|AY214164 1 1 1 1 1 1 1 1 1 1 1 1 1
In504|GU060321 0 0 0 0 0 1 0 1 1 0 0 0 1
In510|EU853659 0 0 0 1 0 1 1 1 1 1 0 1 1
In516|DQ323053 0 0 0 0 0 1 0 0 0 0 0 0 0
In538|KC200566 0 0 0 1 0 0 1 0 1 0 0 0 1
In54|AF220757 0 0 0 0 0 0 0 1 1 0 0 0 0
In545|EU259884 0 0 0 0 0 0 0 0 0 0 0 0 1
In546|FJ594766 0 0 0 0 0 0 0 0 0 0 0 0 1
In554|AJ971343 0 0 0 1 0 0 0 0 0 0 0 0 0
In555|EU675686 0 0 0 1 0 0 1 0 1 0 0 0 1
In556|HQ170516 0 0 0 1 0 1 1 0 1 0 0 0 1
In570|AF453998 0 0 1 0 0 1 1 1 1 0 0 0 0
In573|AM237806 0 0 0 0 0 0 0 1 0 0 0 0 0
In578|FN396877 0 0 0 0 0 0 0 0 0 0 0 0 1
In62|FJ594767 0 0 0 0 0 0 0 0 0 0 0 0 1
In653|HQ730120 0 0 0 0 0 0 0 0 1 0 0 0 0
In663|HQ875011 0 0 0 0 0 0 0 0 1 0 0 0 1
In710|EU434603 0 0 0 0 0 0 0 0 0 0 0 0 1
In718|JF729199 0 0 0 0 0 0 0 1 1 0 0 0 1
In73|AF322577 0 0 0 0 0 0 0 0 1 0 0 0 0
In761|AF175203 0 0 0 0 0 0 0 1 0 0 0 0 1
In771|JX131371 0 0 0 0 0 0 1 0 0 0 0 0 0
In784|JQ837988 0 0 0 0 0 0 0 0 1 0 0 0 0
In79|HQ730118 0 0 0 1 0 0 0 0 0 0 0 0 1
In792|JQ823014 0 0 0 0 0 0 0 0 0 0 0 0 1
In800|JX041889 0 0 0 0 0 0 1 0 0 0 0 0 0
In879|JX885645 0 0 1 0 0 0 0 1 1 0 0 0 1
In901|AY127872 0 0 0 0 0 0 0 0 1 0 0 0 0
In970|KF525327 0 0 0 1 0 0 0 0 0 0 0 0 0

Integrons UpsetR

#----------------Step3: UpsetR Plot Integrons

integron_upsetR_df <- bacAnt_Integrons_plasClass_df %>% select(Integron,Species_ContigClass)
#View(integron_upsetR_df)
integron_upsetR_lt <- split(integron_upsetR_df$Integron, integron_upsetR_df$Species_ContigClass) 
integron_comb_mat = make_comb_mat(integron_upsetR_lt)

#integron_comb_mat
#str(integron_comb_mat)

#as.data.frame(integron_comb_mat)
# comb_mat
#UpSet(integron_comb_mat)


int_col_size = comb_size(integron_comb_mat)
int_row_size = set_size(integron_comb_mat)

integron_ht = UpSet(integron_comb_mat, 
           row_title = "Species#SeqClassification", column_title = "Integron Intersection across species",
           pt_size = unit(6, "pt"),
           lwd = unit(2, "pt"),
           #comb_col = "red",
#           bg_col = c("#d6b23b","#81dce2","#acf49a","#a784d8","#acf49a","#81dce2","#1a51dd","#1d7a01","#683cd8","#d62c4c","#683cd8","#1d7a01" , "#dd71c0","#1a51dd","#dd71c0","#d62c4c"),
           #bg_col = upset_row_col$Color,
           top_annotation = upset_top_annotation(integron_comb_mat, gp = gpar(fill = "#009797"), add_numbers = FALSE, 
                                                 bar_width = 0.5, annotation_name_rot = 90),
           right_annotation = upset_right_annotation(integron_comb_mat, add_numbers = TRUE, gp = gpar(fill = "#009797")),
           show_row_names = TRUE,
           row_names_gp = gpar(fontsize = 9), #changes font size of "set size" labels
           width = unit(700, units = "pt"), height = unit(10, "cm"))

ht = draw(integron_ht)
#ht
int_col_od = column_order(integron_ht)
int_row_od = row_order(integron_ht)

decorate_annotation("intersection_size", {
  grid.text(int_col_size[int_col_od], 
            seq_len(length(int_col_size)), 
            unit(int_col_size[int_col_od], "native") + unit(2, "mm"), 
            default.units = "native", just = "bottom",
            gp = gpar(fontsize = 8))
})

Table4

Integrons proportions in Chromsome and plasmids by Species

bacant_IntegronCounts <- bacAnt_Integrons_plasClass_df %>% 
  separate(Species_ContigClass, sep = "#", into = c("Species", "ContigClass")) %>%
  select(Species,Integron,ContigClass) %>% 
  unique() %>% 
  mutate(Species_Integron=paste(Species, Integron, sep = '#')) %>% 
  select(Species_Integron,ContigClass) %>% 
  table() %>% as.data.frame.matrix() %>% 
  mutate(BothChr_AND_Plasmid = case_when(Chromosome == 1 & Plasmid == 1 ~ 1, TRUE ~ 0)) %>% 
  tibble::rownames_to_column("Species#Integron") %>% 
  separate("Species#Integron", sep = "#", into = c("Species", "Integron")) %>% 
  group_by(Species) %>% 
  summarise(Count_Integron_in_Chr_old=sum(Chromosome), Count_Integron_in_Plasmid_old=sum(Plasmid), Count_Integron_in_Chr_Plasmid=sum(BothChr_AND_Plasmid)) %>% 
  mutate(Count_Integron_in_Chr = Count_Integron_in_Chr_old - Count_Integron_in_Chr_Plasmid, 
         Count_Integron_in_Plasmid = Count_Integron_in_Plasmid_old - Count_Integron_in_Chr_Plasmid) %>% 
  select(Species,Count_Integron_in_Chr,Count_Integron_in_Plasmid,Count_Integron_in_Chr_Plasmid) %>% 
  column_to_rownames(var="Species")

# divides each cell value with corresponding row sum value
bacant_IntegronCounts_props <- bacant_IntegronCounts*100/rowSums(bacant_IntegronCounts) 

bacant_IntegronCounts_props %>% 
  kbl(caption = "Integrons proportions in Chromsome and plasmids by Species") %>% 
  kable_classic(full_width = F, html_font = "Cambria")
Integrons proportions in Chromsome and plasmids by Species
Count_Integron_in_Chr Count_Integron_in_Plasmid Count_Integron_in_Chr_Plasmid
50.000000 37.50000 12.50000
cfreundii 30.000000 65.00000 5.00000
cronobacter 0.000000 100.00000 0.00000
ecloacae 25.000000 58.33333 16.66667
ecoli 20.000000 42.50000 37.50000
kaerogenes 0.000000 100.00000 0.00000
koxytoca 0.000000 100.00000 0.00000
kpneumoniae 9.090909 77.27273 13.63636

Plot2

Integrons Percentages in Chromsome and plasmids by Species

bacant_IntegronCounts_props %>% 
  rownames_to_column(var="Species") %>% 
  reshape2::melt(variable = "Integron location", value.name = "Percentage") %>% 
  na_if(0) %>% 
  ggplot(aes(x = Species, y=Percentage, fill = `Integron location`, label = Percentage)) + 
  geom_bar(width = 0.4, position = position_stack(), stat = "identity")+
  scale_fill_manual(labels=c('Chromosome', 'Plasmid',"Both"), values = c("#fcb6b1", "#facc96", "#4dbfbc")) +
  geom_text(aes(label=paste0(sprintf("%1.1f", Percentage),"%")), position=position_stack(vjust=0.5)) +
  scale_x_discrete(expand = c(0,0)) +
  ylab('Percentage')+
  xlab('Species')+
  ggtitle('Percentages of Integrons by their location')+
  theme_classic() +
  theme(axis.text.y=element_text(size=16,  vjust = 0, hjust = 1),
        axis.text.x=element_text(size=16, angle=90, vjust = 0, hjust = 1.2),
        axis.line=element_blank(),
        axis.ticks=element_blank(),
        legend.position="top") + coord_flip()

AMR Genes

Table1

Percentages of Samples carrying AMR genes in Chromsomes and Plasmids across Species

###@@@@@@@@@@@@@@ Dataframe3 - AMRfinderplus - AMR Genes

amrFinderplus_df <- data.table::fread(file = "3_amrfinderplus_results/CPE_Trans_AMRFinderplus_AMR_Genes.tab", sep = "\t", header = TRUE)
#head(amrFinderplus_df)

colnames(amrFinderplus_df) <- c("Fasta", "Contig", "AMR_GENE", "AMR_GENE_CLASS","Coverage","Identity")

amrFinderplus_df <- amrFinderplus_df %>% filter(Fasta %in% hybridAssemb_1198list) # bacant_plasClass_df now has only 1198 assembly information

amrFinderplus_df <- 
  amrFinderplus_df %>%  
  mutate(Fasta_Contig=paste(Fasta, Contig, sep = '#'))

#----------------Step2: Combining plasClass dataframe with AMRFinderplus dataframe for plotting AMR genes

amrFinderplus_df <- left_join(amrFinderplus_df, plasClass_Prob_df, by = c("Fasta_Contig")) %>% 
  select(Fasta.x,Contig.x,AMR_GENE,AMR_GENE_CLASS,Classification,Species_ContigClass)

amrFinderplus_df_total_col = amrFinderplus_df %>%
  select(Fasta.x,AMR_GENE,Species_ContigClass) %>%
  unique() %>%
  select(Species_ContigClass) %>% 
  unique() %>% 
  count() %>% c()

# percentages of AMR_GENEs
amrFinderplus_df %>% 
  select(Fasta.x,AMR_GENE,Species_ContigClass) %>%
  unique() %>% 
  select(AMR_GENE,Species_ContigClass) %>% 
  count(AMR_GENE, Species_ContigClass) %>% 
  rename(Freq = n) %>%  
  drop_na(Species_ContigClass) %>% 
  mutate(Species_ContigClass2=Species_ContigClass) %>% 
  separate(Species_ContigClass2, sep = "#", into = c("Species", "ContigClass")) %>% 
  left_join(Species_Counts, by = c("Species")) %>% 
  mutate(SamplePrcnt_wGeneElement=Freq*100/SampleCount) %>% 
  select(AMR_GENE,Species_ContigClass,SamplePrcnt_wGeneElement) %>% 
  data.table::dcast(AMR_GENE~Species_ContigClass, value.var="SamplePrcnt_wGeneElement") %>% 
  replace(is.na(.), 0) %>% 
  mutate_if(is.numeric, round, digits=1) %>% 
  #kbl(caption = "Sample count percentages for the AMR genes in Chromsomes and Plasmids across Species") %>% 
  #kable_classic(full_width = F, html_font = "Cambria")
  formattable(., align = c("l",rep("c", amrFinderplus_df_total_col$n)), 
              list(
    `Transposon` = formatter("span", style = ~ style(color = "grey",font.weight = "bold")), 
    area(col = 1:amrFinderplus_df_total_col$n+1) ~ color_tile(customGreen0, customRed))) %>% 
  as.datatable(escape = FALSE,
               options = list(scrollX = FALSE, dom = 'lfti',
                              lengthMenu = list(c(5,10,25, 100, -1), c('5','10','25', '100', 'All')),
                              pageLength = 5),
               rownames = FALSE)

Table2

### AMR Genes and their unique counts in Chromsomes and Plasmids across Species

amrFinderplus_df %>% 
  select(AMR_GENE,Species_ContigClass) %>% 
  unique() %>% 
  table() %>% # frequency of unique transposons
  kbl(caption = "AMR Genes and their unique counts in Chromsomes and Plasmids across Species") %>% 
  kable_classic(full_width = F, html_font = "Cambria")
AMR Genes and their unique counts in Chromsomes and Plasmids across Species
-#Chromosome -#Plasmid cfreundii#Chromosome cfreundii#Plasmid cronobacter#Chromosome cronobacter#Plasmid ecloacae#Chromosome ecloacae#Plasmid ecoli#Chromosome ecoli#Plasmid kaerogenes#Chromosome kaerogenes#Plasmid koxytoca#Chromosome koxytoca#Plasmid kpneumoniae#Chromosome kpneumoniae#Plasmid senterica#Plasmid
aac(3)-IId 1 1 1 1 0 1 1 1 1 1 0 0 0 1 0 1 0
aac(3)-IIe 0 1 1 1 0 0 1 1 1 1 0 1 0 1 1 1 0
aac(3)-IIg 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
aac(3)-IVa 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 1 0
aac(6’)-Ib 1 1 0 1 0 0 0 1 0 1 0 0 0 0 1 1 0
aac(6’)-Ib-cr5 0 1 1 1 0 1 1 1 1 1 0 1 0 1 1 1 0
aac(6’)-Ib’ 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0
aac(6’)-Ib3 0 0 0 1 0 0 1 1 1 1 0 0 0 0 0 1 0
aac(6’)-Ib4 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0
aac(6’)-If 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
aac(6’)-IIc 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
aac(6’)-Il 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
aacA34 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
aadA1 1 1 1 1 0 0 1 1 1 1 0 1 0 0 1 1 0
aadA16 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 1 0
aadA2 1 1 0 1 0 0 1 1 1 1 0 1 0 0 1 1 0
aadA22 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
aadA5 1 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0
aadA8 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
ampC 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
ant(2’’)-Ia 0 1 1 1 0 0 0 1 1 0 0 0 0 0 0 1 0
aph(3’’)-Ib 1 1 0 1 0 1 1 1 1 1 0 1 0 1 1 1 0
aph(3’)-Ia 1 0 0 1 0 0 1 0 1 1 0 0 0 0 1 1 0
aph(3’)-IIa 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0
aph(3’)-VI 0 0 0 1 0 0 1 1 1 1 0 0 0 0 0 1 0
aph(3’)-XV 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0
aph(4)-Ia 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 1 0
aph(6)-Id 1 1 0 1 0 1 1 1 1 1 0 1 0 1 1 1 0
armA 0 1 0 1 0 1 0 1 1 1 0 1 0 1 1 1 0
arr 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
arr-2 0 0 0 1 0 0 0 1 0 1 0 1 0 0 1 1 0
arr-3 0 1 0 1 0 1 0 1 0 1 0 1 0 0 1 1 0
blaACT-1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
blaACT-16 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
blaACT-17 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
blaACT-24 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
blaACT-25 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
blaACT-28 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
blaACT-3 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
blaACT-40 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
blaACT-45 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0
blaACT-49 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
blaACT-51 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
blaACT-52 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
blaACT-53 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
blaACT-64 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
blaACT-65 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
blaACT-69 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
blaACT-74 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
blaACT-90 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
blaCARB-2 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0
blaCMH-4 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
blaCMY-109 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
blaCMY-148 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
blaCMY-152 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
blaCMY-159 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
blaCMY-2 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0
blaCMY-35 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
blaCMY-39 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
blaCMY-4 0 0 0 0 0 1 0 1 0 1 0 0 0 0 0 1 0
blaCMY-42 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
blaCMY-48 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
blaCMY-6 0 1 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0
blaCMY-65 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
blaCMY-66 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
blaCMY-75 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
blaCMY-84 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
blaCTX-M-14 1 0 1 0 0 0 1 0 1 1 0 0 0 0 0 1 0
blaCTX-M-15 1 1 1 1 0 1 1 1 1 1 0 1 0 1 1 1 0
blaCTX-M-27 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0
blaCTX-M-3 1 1 0 1 0 0 0 1 0 1 0 1 0 0 0 1 0
blaCTX-M-55 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0
blaCTX-M-65 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0
blaCTX-M-8 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0
blaCTX-M-9 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
blaDHA-1 1 0 0 1 0 0 1 0 0 1 0 1 0 0 1 1 0
blaDHA-15 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
blaDHA-17 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
blaDHA-21 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
blaDHA-4 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
blaEC-5 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
blaGES-9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
blaIMI-1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
blaIMP-1 0 1 0 1 0 0 0 1 0 1 0 0 0 0 0 1 0
blaIMP-26 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
blaIMP-4 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0
blaKPC-2 0 1 0 1 0 1 0 1 1 1 0 1 0 1 1 1 0
blaKPC-6 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
blaLAP-1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
blaLAP-2 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 1 0
blaLEN-17 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaMAL-1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
blaMAL-2 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
blaMIR-20 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
blaMIR-5 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
blaNDM-1 0 1 0 1 0 1 1 1 1 1 0 1 0 1 1 1 1
blaNDM-4 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0
blaNDM-5 0 0 0 1 0 0 0 0 0 1 0 0 0 0 1 1 0
blaNDM-7 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 1 0
blaNDM-9 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0
blaOKP-A-11 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaOKP-A-17 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaOKP-A-9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaOKP-B 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaOKP-B-17 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaOKP-B-2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaOKP-B-22 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaOKP-B-34 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaOKP-B-6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaOKP-B-7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaOKP-B-8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaOXA-1 1 1 1 1 0 0 1 1 1 1 0 1 0 1 1 1 0
blaOXA-10 0 0 0 1 0 0 0 1 0 1 0 1 0 0 0 1 0
blaOXA-181 0 0 1 0 0 0 0 1 0 1 0 0 0 0 1 1 0
blaOXA-2 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
blaOXA-21 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
blaOXA-23 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
blaOXA-232 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0
blaOXA-48 0 0 0 1 0 1 0 1 0 1 0 0 0 0 0 1 0
blaOXA-9 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 1 0
blaOXY-1-1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
blaOXY-1-2 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
blaOXY-2-1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
blaPER-7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
blaSFO-1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0
blaSHV-1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0
blaSHV-108 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaSHV-11 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0
blaSHV-12 1 0 0 1 0 0 1 1 1 1 0 1 0 0 1 1 0
blaSHV-144 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaSHV-186 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaSHV-187 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaSHV-26 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaSHV-27 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaSHV-28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaSHV-2A 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
blaSHV-32 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaSHV-33 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaSHV-41 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaSHV-5 0 0 0 1 0 0 0 1 0 1 0 0 0 0 1 1 0
blaSHV-52 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaSHV-60 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaSHV-62 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaSHV-71 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaSHV-75 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaSHV-76 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaSHV-77 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
blaTEM-1 1 1 1 1 0 1 1 1 1 1 0 1 0 1 1 1 0
blaTEM-116 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0
blaTEM-135 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
blaTEM-169 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
blaTEM-176 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0
blaTEM-190 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
blaTEM-2 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
blaTEM-215 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0
blaTEM-30 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
blaTEM-31 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
blaTEM-32 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
blaVIM-4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
ble 0 1 0 1 0 1 1 1 1 1 0 1 0 1 1 1 1
catA1 1 0 1 0 0 0 1 1 1 1 0 1 0 0 0 1 0
catA2 0 0 1 1 0 0 1 1 0 1 0 0 0 0 0 1 0
catB 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 1 0
catB3 0 1 0 1 0 0 0 1 1 1 0 0 0 0 1 1 0
catB8 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
cmlA1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0
cmlA5 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 1 0
dfrA1 1 0 1 1 0 0 1 1 1 0 0 0 0 0 1 1 0
dfrA12 1 0 0 1 0 0 1 1 1 1 0 1 0 0 1 1 0
dfrA14 0 1 0 1 0 0 1 1 1 1 0 1 0 1 1 1 0
dfrA15 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0
dfrA16 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 0
dfrA17 1 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0
dfrA23 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
dfrA27 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 1 0
dfrA29 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
dfrA32 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0
dfrA33 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 1 0
dfrA5 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0
dfrA7 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0
dfrB4 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
ere(A) 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 1 0
erm(42) 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
floR 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 1 0
fosA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
fosA10 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0
fosA3 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0
fosA4 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0
fosA7.5 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
lnu(F) 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0
mcr-1.1 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0
mcr-10.1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 1 0
mcr-3.1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
mcr-4.3 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
mef(B) 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0
mph(A) 1 1 0 1 0 1 1 1 1 1 0 1 0 1 1 1 0
mph(E) 0 1 0 1 0 1 0 1 1 1 0 1 0 1 1 1 0
msr(E) 0 1 0 1 0 1 0 1 1 1 0 1 0 1 1 1 0
oqxA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
oqxA10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
oqxA11 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
oqxB 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
oqxB14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
oqxB19 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
oqxB20 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
oqxB22 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
oqxB25 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
oqxB32 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
oqxB5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
oqxB6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
qacEdelta1 1 1 1 1 0 1 1 1 1 1 0 1 0 0 1 1 0
qacG2 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0
qacL 0 1 1 1 0 0 0 1 1 1 0 0 0 0 0 1 0
qepA1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
qepA4 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
qepA8 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
qnrA1 0 0 0 1 0 0 0 1 1 1 0 0 0 0 0 1 0
qnrB1 0 1 1 1 0 0 1 1 1 1 0 1 0 1 1 1 0
qnrB12 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
qnrB17 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
qnrB2 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0
qnrB28 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
qnrB35 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
qnrB38 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
qnrB4 0 0 0 1 0 0 1 0 0 1 0 1 0 0 1 1 0
qnrB6 0 1 1 1 0 0 0 0 0 1 0 1 0 0 0 1 0
qnrB69 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
qnrB7 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0
qnrB9 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0
qnrS1 0 1 1 1 0 0 0 1 1 1 0 1 0 0 0 1 0
qnrS2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
rmtB1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0
rmtC 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 1 0
rmtF1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 1 0
rmtF2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
sat2 1 0 1 1 0 0 1 0 1 0 0 0 0 0 1 0 0
smr 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
sul1 1 1 1 1 0 1 1 1 1 1 0 1 0 1 1 1 0
sul2 1 1 0 1 0 0 1 1 1 1 0 1 0 1 1 1 0
sul3 0 1 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0
tet(A) 1 1 0 1 0 0 1 1 1 1 0 1 0 1 1 1 0
tet(B) 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0
tet(C) 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 1 0
tet(D) 0 0 0 1 0 0 0 1 0 1 0 1 0 0 1 1 0
tet(G) 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
tet(M) 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0
tet(X4) 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0

AMR Genes UpsetR

#----------------Step3: UpsetR Plot AMR Genes
amr_upsetR_df <- amrFinderplus_df %>% select(AMR_GENE,Species_ContigClass)

amr_upsetR_lt <- split(amr_upsetR_df$AMR_GENE, amr_upsetR_df$Species_ContigClass) 
amr_comb_mat = make_comb_mat(amr_upsetR_lt)

col_size = comb_size(amr_comb_mat)
row_size = set_size(amr_comb_mat)

amr_ht = UpSet(amr_comb_mat, 
           row_title = "Species#SeqClassification", column_title = "AMR Genes Intersection across species",
           pt_size = unit(6, "pt"),
           lwd = unit(2, "pt"),
           #comb_col = "red",
           # bg_col = c("#d6b23b","#81dce2","#acf49a","#a784d8","#acf49a","#81dce2","#1a51dd","#1d7a01","#683cd8","#d62c4c","#683cd8","#1d7a01" ,
           #            "#dd71c0","#1a51dd","#dd71c0","#d62c4c"),
           #bg_col = upset_row_col$Color,
           top_annotation = upset_top_annotation(amr_comb_mat, gp = gpar(fill = "#009797"), add_numbers = FALSE, 
                                                 bar_width = 0.5, annotation_name_rot = 90),
           right_annotation = upset_right_annotation(amr_comb_mat, add_numbers = TRUE, gp = gpar(fill = "#009797")),
           show_row_names = TRUE,
           row_names_gp = gpar(fontsize = 9), #changes font size of "set size" labels
           width = unit(900, units = "pt"), height = unit(12, "cm"))

amr_ht = draw(amr_ht)
#ht
col_od = column_order(amr_ht)
row_od = row_order(amr_ht)

decorate_annotation("intersection_size", {
  grid.text(col_size[col_od], 
            seq_len(length(col_size)), 
            unit(col_size[col_od], "native") + unit(2, "mm"), 
            default.units = "native", just = "bottom",
            gp = gpar(fontsize = 8))
})

Table3

AMR Genes counts in Chromsome and plasmids by Species

amrFinderplus_df %>% 
  separate(Species_ContigClass, sep = "#", into = c("Species", "ContigClass")) %>% 
  select(Species,AMR_GENE,ContigClass) %>% 
  unique() %>% 
  mutate(Species_AMR_GENE=paste(Species, AMR_GENE, sep = '#')) %>% 
  select(Species_AMR_GENE,ContigClass) %>% 
  table() %>% as.data.frame.matrix() %>% 
  mutate(BothChr_AND_Plasmid = case_when(Chromosome == 1 & Plasmid == 1 ~ 1, TRUE ~ 0)) %>% 
  tibble::rownames_to_column("Species#AMRGEnes") %>% 
  separate("Species#AMRGEnes", sep = "#", into = c("Species", "AMRGene")) %>% 
  group_by(Species) %>% 
  summarise(Count_AMRgenes_in_Chr_old=sum(Chromosome), Count_AMRgenes_in_Plasmid_old=sum(Plasmid), Count_AMRgenes_in_Chr_Plasmid=sum(BothChr_AND_Plasmid)) %>% 
  mutate(Count_AMRgenes_in_Chr = Count_AMRgenes_in_Chr_old - Count_AMRgenes_in_Chr_Plasmid, 
         Count_AMRgenes_in_Plasmid = Count_AMRgenes_in_Plasmid_old - Count_AMRgenes_in_Chr_Plasmid) %>% 
  select(Species,Count_AMRgenes_in_Chr,Count_AMRgenes_in_Plasmid,Count_AMRgenes_in_Chr_Plasmid) %>% 
  mutate(Total_AMRGenes = rowSums(.[2:4])) %>% 
  kbl(caption = "AMR Genes counts in Chromsome and plasmids by Species") %>% 
  kable_classic(full_width = F, html_font = "Cambria")
AMR Genes counts in Chromsome and plasmids by Species
Species Count_AMRgenes_in_Chr Count_AMRgenes_in_Plasmid Count_AMRgenes_in_Chr_Plasmid Total_AMRGenes
17 27 16 60
cfreundii 24 46 17 87
cronobacter 2 20 0 22
ecloacae 30 44 30 104
ecoli 11 57 53 121
kaerogenes 1 39 0 40
koxytoca 3 20 0 23
kpneumoniae 46 74 41 161
senterica 0 2 0 2

Table4

AMR Genes proportions in Chromsome and plasmids by Species

amr_geneCounts <- amrFinderplus_df %>% 
  separate(Species_ContigClass, sep = "#", into = c("Species", "ContigClass")) %>% 
  select(Species,AMR_GENE,ContigClass) %>% 
  unique() %>% 
  mutate(Species_AMR_GENE=paste(Species, AMR_GENE, sep = '#')) %>% 
  select(Species_AMR_GENE,ContigClass) %>% 
  table() %>% as.data.frame.matrix() %>% 
  mutate(BothChr_AND_Plasmid = case_when(Chromosome == 1 & Plasmid == 1 ~ 1, TRUE ~ 0)) %>% 
  tibble::rownames_to_column("Species#AMRGEnes") %>% 
  separate("Species#AMRGEnes", sep = "#", into = c("Species", "AMRGene")) %>% 
  group_by(Species) %>% 
  summarise(Count_AMRgenes_in_Chr_old=sum(Chromosome), Count_AMRgenes_in_Plasmid_old=sum(Plasmid), Count_AMRgenes_in_Chr_Plasmid=sum(BothChr_AND_Plasmid)) %>% 
  mutate(Count_AMRgenes_in_Chr = Count_AMRgenes_in_Chr_old - Count_AMRgenes_in_Chr_Plasmid, 
         Count_AMRgenes_in_Plasmid = Count_AMRgenes_in_Plasmid_old - Count_AMRgenes_in_Chr_Plasmid) %>% 
  select(Species,Count_AMRgenes_in_Chr,Count_AMRgenes_in_Plasmid,Count_AMRgenes_in_Chr_Plasmid) %>% 
  column_to_rownames(var="Species")

# divides each cell value with corresponding row sum value
amr_geneCounts_props <- amr_geneCounts*100/rowSums(amr_geneCounts) 

amr_geneCounts_props %>% 
  kbl(caption = "AMR Genes proportions in Chromsome and plasmids by Species") %>% 
  kable_classic(full_width = F, html_font = "Cambria")
AMR Genes proportions in Chromsome and plasmids by Species
Count_AMRgenes_in_Chr Count_AMRgenes_in_Plasmid Count_AMRgenes_in_Chr_Plasmid
28.333333 45.00000 26.66667
cfreundii 27.586207 52.87356 19.54023
cronobacter 9.090909 90.90909 0.00000
ecloacae 28.846154 42.30769 28.84615
ecoli 9.090909 47.10744 43.80165
kaerogenes 2.500000 97.50000 0.00000
koxytoca 13.043478 86.95652 0.00000
kpneumoniae 28.571429 45.96273 25.46584
senterica 0.000000 100.00000 0.00000

Plot2

AMR Genes Percentages in Chromsome and plasmids by Species

amr_geneCounts_props %>% 
  rownames_to_column(var="Species") %>% 
  reshape2::melt(variable = "AMR gene location", value.name = "Percentage") %>% 
  na_if(0) %>% 
  ggplot(aes(x = Species, y=Percentage, fill = `AMR gene location`, label = Percentage)) + 
  geom_bar(width = 0.4, position = position_stack(), stat = "identity")+
  scale_fill_manual(labels=c('Chromosome', 'Plasmid',"Both"), values = c("#fcb6b1", "#facc96", "#4dbfbc")) +
  geom_text(aes(label=paste0(sprintf("%1.1f", Percentage),"%")), position=position_stack(vjust=0.5)) +
  scale_x_discrete(expand = c(0,0)) +
  ylab('Percentage')+
  xlab('Species')+
  ggtitle('Percentages of AMR genes by their location')+
  theme_classic() +
  theme(axis.text.y=element_text(size=16,  vjust = 0, hjust = 1),
        axis.text.x=element_text(size=16, angle=90, vjust = 0, hjust = 1.2),
        axis.line=element_blank(),
        axis.ticks=element_blank(),
        legend.position="top") + coord_flip()

VF Genes

Table1

Percentages of Samples carrying Virulence genes in Chromsomes and Plasmids across Species

###@@@@@@@@@@@@@@ Dataframe4 - Abricate - Virulence Factor Genes

Abricate_vf_df <- data.table::fread(file = "4_abricate_vf_results/CPE_Trans_Abricate_VF_Genes.tab", sep = "\t", header = TRUE)
#head(Abricate_vf_df)

##########-------------- Modifying dataframe START -------------------################

colnames(Abricate_vf_df ) <- c("Fasta", "Contig","START","END","STRAND","VF_GENE")

Abricate_vf_df <- 
  Abricate_vf_df %>%  
  mutate(Fasta_Contig=paste(Fasta, Contig, sep = '#'))

Abricate_vf_df <- Abricate_vf_df %>% filter(Fasta %in% hybridAssemb_1198list) # bacant_plasClass_df now has only 1198 assembly information

#head(Abricate_vf_df)

#----------------Step2: Combining plasClass dataframe with abricate_vf_dataframe for plotting virulence genes

Abricate_vf_df <- left_join(Abricate_vf_df, plasClass_Prob_df, by = c("Fasta_Contig")) %>% 
  select(Fasta.x,Contig.x,VF_GENE,Classification,Species_ContigClass)

#View(Abricate_vf_df)

Abricate_vf_df_total_col = Abricate_vf_df %>%
  select(Fasta.x,VF_GENE,Species_ContigClass) %>%
  unique() %>%
  select(Species_ContigClass) %>% 
  unique() %>% 
  drop_na(Species_ContigClass) %>% count() %>% c()

# percentages of AMR_GENEs
Abricate_vf_df %>% 
  select(Fasta.x,VF_GENE,Species_ContigClass) %>%
  unique() %>% 
  select(VF_GENE,Species_ContigClass) %>% 
  count(VF_GENE, Species_ContigClass) %>% 
  rename(Freq = n) %>%  
  drop_na(Species_ContigClass) %>% 
  mutate(Species_ContigClass2=Species_ContigClass) %>% 
  separate(Species_ContigClass2, sep = "#", into = c("Species", "ContigClass")) %>% 
  left_join(Species_Counts, by = c("Species")) %>% 
  mutate(SamplePrcnt_wGeneElement=Freq*100/SampleCount) %>% 
  select(VF_GENE,Species_ContigClass,SamplePrcnt_wGeneElement) %>% 
  data.table::dcast(VF_GENE~Species_ContigClass, value.var="SamplePrcnt_wGeneElement") %>% 
  replace(is.na(.), 0) %>% 
  mutate_if(is.numeric, round, digits=1) %>% 
 # kbl(caption = "Sample count percentages for the Virulence factor genes in Chromsomes and Plasmids across Species") %>% 
 # kable_classic(full_width = F, html_font = "Cambria")
  formattable(., align = c("l",rep("c", Abricate_vf_df_total_col $n)), 
              list(
    `Transposon` = formatter("span", style = ~ style(color = "grey",font.weight = "bold")), 
    area(col = 1:Abricate_vf_df_total_col $n+1) ~ color_tile(customGreen0, customRed))) %>% 
  as.datatable(escape = FALSE,
               options = list(scrollX = FALSE, dom = 'lfti',
                              lengthMenu = list(c(5, 10, 25, 100, -1), c('5','10','25', '100', 'All')),
                              pageLength = 5),
               rownames = FALSE)

Table2

Virulence genes and their unique counts in Chromsomes and Plasmids across Species

Abricate_vf_df %>% 
  select(VF_GENE,Species_ContigClass) %>% 
  unique() %>% 
  table() %>% # frequency of unique transposons
  kbl(caption = "Virulence factors genes and their unique counts in Chromsomes and Plasmids across Species") %>% 
  kable_classic(full_width = F, html_font = "Cambria")
Virulence factors genes and their unique counts in Chromsomes and Plasmids across Species
-#Chromosome cfreundii#Chromosome cfreundii#Plasmid cronobacter#Chromosome ecloacae#Chromosome ecloacae#Plasmid ecoli#Chromosome ecoli#Plasmid kaerogenes#Chromosome koxytoca#Chromosome kpneumoniae#Chromosome kpneumoniae#Plasmid senterica#Chromosome
aap/aspU 0 0 0 0 0 0 0 1 0 0 0 0 0
afaA 0 0 0 0 0 0 1 0 0 0 0 0 0
afaB-I 0 0 0 0 0 0 1 0 0 0 0 0 0
afaC-I 0 0 0 0 0 0 1 0 0 0 0 0 0
afaD 0 0 0 0 0 0 1 0 0 0 0 0 0
afaE-I 0 0 0 0 0 0 1 0 0 0 0 0 0
aslA 1 1 0 0 0 0 1 0 0 0 1 0 0
astA 0 1 0 0 0 1 1 1 0 1 1 1 0
cesAB 0 0 0 0 0 0 1 0 0 0 0 0 0
cesD 0 0 0 0 0 0 1 0 0 0 0 0 0
cesD2 0 0 0 0 0 0 1 0 0 0 0 0 0
cesL 0 0 0 0 0 0 1 0 0 0 0 0 0
cesT 0 0 0 0 0 0 1 0 0 0 0 0 0
cheY 1 1 0 0 0 0 0 0 0 0 0 0 0
chuA 0 0 0 1 0 0 1 0 0 0 0 0 1
chuS 0 1 0 0 0 0 1 0 0 0 0 0 0
chuT 0 1 0 0 0 0 1 0 0 0 0 0 0
chuU 0 1 0 0 0 0 1 0 0 0 0 0 0
chuV 0 1 0 0 0 0 1 0 0 0 0 0 0
chuW 0 1 0 0 0 0 1 0 0 0 0 0 0
chuX 0 1 0 1 0 0 1 0 0 0 0 0 1
chuY 0 1 0 0 0 0 1 0 0 0 0 0 0
cnf1 0 0 0 0 0 0 1 1 0 0 0 0 0
csgA 1 1 0 0 0 0 0 0 0 0 0 0 0
csgB 1 1 0 1 0 0 1 0 0 0 0 0 1
csgD 1 1 0 1 1 0 1 0 0 0 0 0 1
csgE 1 1 0 1 0 0 0 0 0 0 0 0 1
csgF 1 1 0 1 0 0 1 0 0 0 0 0 1
csgG 1 1 0 1 1 0 1 0 0 0 0 0 1
csnA 0 0 0 0 0 0 1 0 0 0 0 0 0
daaD 0 0 0 0 0 0 1 0 0 0 0 0 0
daaE 0 0 0 0 0 0 1 0 0 0 0 0 0
daaF 0 0 0 0 0 0 1 0 0 0 0 0 0
draA 0 0 0 0 0 0 1 0 0 0 0 0 0
draD 0 0 0 0 0 0 1 0 0 0 0 0 0
draE2 0 0 0 0 0 0 1 0 0 0 0 0 0
draP 0 0 0 0 0 0 1 0 0 0 0 0 0
eae 0 0 0 0 0 0 1 0 0 0 0 0 0
east1 0 0 0 1 0 0 0 0 0 0 0 0 0
entA 0 1 0 1 1 0 1 0 1 1 1 0 1
entB 1 1 0 1 1 0 1 0 1 1 1 0 1
entC 1 1 0 0 0 0 1 0 0 0 0 0 0
entD 0 1 0 0 0 0 1 0 0 0 0 0 0
entE 1 1 0 1 0 0 1 0 0 0 0 0 1
entF 0 1 0 0 0 0 1 0 0 0 0 0 0
entS 1 1 0 1 0 0 1 0 0 0 0 0 1
escC 0 0 0 0 0 0 1 0 0 0 0 0 0
escD 0 0 0 0 0 0 1 0 0 0 0 0 0
escE 0 0 0 0 0 0 1 0 0 0 0 0 0
escF 0 0 0 0 0 0 1 0 0 0 0 0 0
escG 0 0 0 0 0 0 1 0 0 0 0 0 0
escI 0 0 0 0 0 0 1 0 0 0 0 0 0
escJ 0 0 0 0 0 0 1 0 0 0 0 0 0
escL 0 0 0 0 0 0 1 0 0 0 0 0 0
escN 0 0 0 0 0 0 1 0 0 0 0 0 0
escO 0 0 0 0 0 0 1 0 0 0 0 0 0
escP 0 0 0 0 0 0 1 0 0 0 0 0 0
escR 0 0 0 0 0 0 1 0 0 0 0 0 0
escS 0 0 0 0 0 0 1 0 0 0 0 0 0
escT 0 0 0 0 0 0 1 0 0 0 0 0 0
escU 0 0 0 0 0 0 1 0 0 0 0 0 0
escV 0 0 0 0 0 0 1 0 0 0 0 0 0
espA 0 0 0 0 0 0 1 0 0 0 0 0 0
espB 0 0 0 0 0 0 1 0 0 0 0 0 0
espD 0 0 0 0 0 0 1 0 0 0 0 0 0
espG 0 0 0 0 0 0 1 0 0 0 0 0 0
espK 0 0 0 0 0 0 1 0 0 0 0 0 0
espL1 0 1 0 0 0 0 1 0 0 0 0 0 0
espL4 0 0 0 0 0 0 1 0 0 0 0 0 0
espR1 0 0 0 0 0 0 1 0 0 0 0 0 0
espR3 0 0 0 0 0 0 1 0 0 0 0 0 0
espR4 0 0 0 0 0 0 1 0 0 0 0 0 0
espX1 0 0 0 0 0 0 1 1 0 0 0 0 0
espX2 0 0 0 0 0 0 1 0 0 0 0 0 0
espX4 0 0 0 0 0 0 1 0 0 0 0 0 0
espX5 0 0 0 0 0 0 1 0 0 0 0 0 0
espY1 0 0 0 0 0 0 1 0 0 0 0 0 0
espY2 0 1 0 0 0 0 1 0 0 0 0 0 0
espY3 0 0 0 0 0 0 1 0 0 0 0 0 0
espY4 0 0 0 0 0 0 1 0 0 0 0 0 0
etgA 0 0 0 0 0 0 1 0 0 0 0 0 0
faeC 0 0 0 0 0 0 0 1 0 0 0 0 0
faeD 0 0 0 0 0 0 0 1 0 0 0 0 0
faeE 0 0 0 0 0 0 0 1 0 0 0 0 0
faeF 0 0 0 0 0 0 0 1 0 0 0 0 0
faeH 0 0 0 0 0 0 0 1 0 0 0 0 0
faeI 0 0 0 0 0 0 0 1 0 0 0 0 0
fdeC 0 1 0 0 0 0 1 0 0 0 0 0 0
fepA 0 1 0 0 0 0 1 0 0 0 0 0 0
fepB 0 1 0 0 0 0 1 0 0 0 0 0 0
fepC 1 1 0 1 0 0 1 0 1 0 1 0 1
fepD 1 1 0 0 1 0 1 0 0 0 1 0 0
fepG 1 1 0 1 0 0 1 0 0 0 1 0 0
fes 0 1 0 0 0 0 1 0 0 0 0 0 0
fimA 0 1 0 0 0 0 1 0 0 0 0 0 0
fimB 0 1 0 0 0 0 1 0 0 0 0 0 0
fimC 0 1 0 0 0 0 1 0 0 0 0 0 0
fimD 0 1 0 1 0 0 1 0 0 0 0 0 1
fimE 1 0 0 0 0 0 1 0 0 0 0 0 0
fimF 0 1 0 0 0 0 1 0 0 0 0 0 0
fimG 0 1 0 0 0 0 1 0 0 0 0 0 0
fimH 0 1 0 0 0 0 1 0 0 0 0 0 0
fimI 0 1 0 0 0 0 1 0 0 0 0 0 0
flgH 0 0 0 0 0 0 1 0 1 0 0 0 0
fliG 1 1 0 1 0 0 1 0 1 0 0 0 1
fliM 1 0 0 0 0 0 1 0 1 0 0 0 0
fliN 0 0 0 0 0 0 1 0 1 0 0 0 0
fliP 1 0 0 0 0 0 0 0 0 0 0 0 0
focA 0 0 0 0 0 0 1 0 0 0 0 0 0
focC 0 0 0 0 0 0 1 0 0 0 0 0 0
focD 0 0 0 0 0 0 1 0 0 0 0 0 0
focF 0 0 0 0 0 0 1 0 0 0 0 0 0
focG 0 0 0 0 0 0 1 0 0 0 0 0 0
focH 0 0 0 0 0 0 1 0 0 0 0 0 0
fyuA 0 1 0 1 1 0 1 0 1 1 1 1 1
gspC 0 1 0 0 0 0 1 0 0 0 0 0 0
gspD 0 1 0 0 0 0 1 0 0 0 0 0 0
gspE 0 1 0 0 0 0 1 0 0 0 0 0 0
gspF 0 1 0 0 0 0 1 0 0 0 0 0 0
gspG 0 1 0 0 0 0 1 0 0 0 0 0 0
gspH 0 1 0 0 0 0 1 0 0 0 0 0 0
gspI 0 1 0 0 0 0 1 0 0 0 0 0 0
gspJ 0 1 0 0 0 0 1 0 0 0 0 0 0
gspK 0 1 0 0 0 0 1 0 0 0 0 0 0
gspL 0 1 0 0 0 0 1 0 0 0 0 0 0
gspM 0 1 0 0 0 0 1 0 0 0 0 0 0
gtrA 0 0 0 0 0 0 1 1 0 0 0 0 0
gtrB 0 0 0 0 0 0 1 1 0 0 0 0 0
hlyA 0 0 0 0 0 0 1 1 0 0 0 0 0
hlyB 0 0 0 0 0 0 1 1 0 0 0 0 0
hlyC 0 0 0 0 0 0 1 1 0 0 0 0 0
hlyD 0 0 0 0 0 0 1 1 0 0 0 0 0
ibeA 0 1 0 0 0 0 1 0 0 0 0 0 0
iroB 0 0 0 0 1 0 1 1 1 0 0 1 0
iroC 0 0 0 0 1 0 1 1 0 0 0 1 0
iroD 0 0 0 0 1 0 1 1 0 0 0 1 0
iroE 0 0 0 0 1 0 1 1 0 0 0 1 0
iroN 0 0 0 0 1 0 1 1 1 0 0 1 0
irp1 0 1 0 1 1 0 1 0 1 1 1 1 1
irp2 0 1 0 1 1 0 1 0 1 1 1 1 1
iucA 0 0 1 1 0 0 1 1 0 0 0 1 1
iucB 0 0 1 1 0 0 1 1 0 0 0 1 1
iucC 0 0 1 1 0 0 1 1 0 0 0 1 1
iucD 0 0 1 0 0 0 1 1 0 0 0 1 0
iutA 0 0 1 1 0 0 1 1 0 0 0 1 1
kpsD 0 1 0 0 0 0 1 0 0 0 0 0 0
kpsM 0 1 0 0 0 0 1 0 0 0 0 0 0
kpsT 0 0 0 0 0 0 1 0 0 0 0 0 0
lpfA 0 0 0 0 1 0 0 0 0 0 0 0 0
lpfB 0 0 0 0 1 0 0 0 0 0 0 0 0
lpfC 0 0 0 0 1 0 0 0 0 0 0 0 0
lpfE 0 0 0 0 1 0 0 0 0 0 0 0 0
map 0 0 0 0 0 0 1 0 0 0 0 0 0
mgtB 0 0 0 0 0 0 0 0 0 1 0 0 0
nleB2 0 0 0 0 0 0 1 0 0 0 0 0 0
ompA 1 1 0 1 1 0 1 0 1 1 1 0 1
paa 0 0 0 0 0 0 1 1 0 0 0 0 0
papA 0 0 0 0 0 0 1 0 0 0 0 0 0
papB 0 0 0 0 0 0 1 1 0 0 0 0 0
papC 0 0 0 0 0 0 1 1 0 0 0 0 0
papD 0 0 0 0 0 0 1 1 0 0 0 0 0
papE 0 0 0 0 0 0 1 0 0 0 0 0 0
papF 0 0 0 0 0 0 1 1 0 0 0 0 0
papG 0 0 0 0 0 0 1 0 0 0 0 0 0
papH 0 0 0 0 0 0 1 1 0 0 0 0 0
papI 0 0 0 0 0 0 1 1 0 0 0 0 0
papJ 0 0 0 0 0 0 1 1 0 0 0 0 0
papK 0 0 0 0 0 0 1 1 0 0 0 0 0
papX 0 0 0 0 0 0 1 1 0 0 0 0 0
pic 0 0 0 0 0 0 1 0 0 0 0 0 0
sat 0 0 0 0 0 0 1 1 0 0 0 0 0
senB 0 0 0 1 0 0 0 1 0 0 0 0 1
sepD 0 0 0 0 0 0 1 0 0 0 0 0 0
sepL 0 0 0 0 0 0 1 0 0 0 0 0 0
sepQ/escQ 0 0 0 0 0 0 1 0 0 0 0 0 0
sfaA 0 0 0 0 0 0 1 0 0 0 0 0 0
sfaB 0 0 0 0 0 0 1 0 0 0 0 0 0
sfaC 0 0 0 0 0 0 1 0 0 0 0 0 0
sfaD 0 0 0 0 0 0 1 0 0 0 0 0 0
sfaE 0 0 0 0 0 0 1 0 0 0 0 0 0
sfaF 0 0 0 0 0 0 1 0 0 0 0 0 0
sfaG 0 0 0 0 0 0 1 0 0 0 0 0 0
sfaH 0 0 0 0 0 0 1 0 0 0 0 0 0
sfaS 0 0 0 0 0 0 1 0 0 0 0 0 0
sfaX 0 0 0 0 0 0 1 0 0 0 0 0 0
sfaY 0 0 0 0 0 0 1 0 0 0 0 0 0
shuA 0 1 0 0 0 0 1 0 0 0 0 0 0
shuS 0 0 0 0 0 0 1 0 0 0 0 0 0
shuT 0 0 0 0 0 0 1 0 0 0 0 0 0
shuX 0 1 0 0 0 0 1 0 0 0 0 0 0
shuY 0 0 0 0 0 0 1 0 0 0 0 0 0
sinH 0 1 0 0 0 0 0 0 0 0 0 0 0
tcpC 0 0 0 0 0 0 1 0 0 0 0 0 0
tviB 0 1 0 0 0 0 0 0 0 0 0 0 0
tviC 0 1 0 0 0 0 0 0 0 0 0 0 0
tviD 0 1 0 0 0 0 0 0 0 0 0 0 0
tviE 0 1 0 0 0 0 0 0 0 0 0 0 0
vat 0 0 0 0 0 0 1 0 0 0 0 0 0
vexA 0 1 0 0 0 0 0 0 0 0 0 0 0
vexB 0 1 0 0 0 0 0 0 0 0 0 0 0
vexC 0 1 0 0 0 0 0 0 0 0 0 0 0
vexD 0 1 0 0 0 0 0 0 0 0 0 0 0
vexE 0 1 0 0 0 0 0 0 0 0 0 0 0
yagV/ecpE 0 0 1 0 0 1 1 0 0 0 1 0 0
yagW/ecpD 0 0 1 0 0 1 1 0 1 0 1 0 0
yagX/ecpC 0 0 1 0 0 1 1 0 0 0 1 0 0
yagY/ecpB 0 0 1 0 0 0 1 0 0 0 1 0 0
yagZ/ecpA 0 0 1 0 0 1 1 0 1 1 1 0 0
ybtA 0 1 0 1 1 0 1 0 1 1 1 1 1
ybtE 0 1 0 1 1 0 1 0 1 1 1 1 1
ybtP 0 1 0 1 1 0 1 0 1 1 1 1 1
ybtQ 0 1 0 1 1 0 1 0 1 1 1 1 1
ybtS 1 1 0 1 1 0 1 0 1 1 1 1 1
ybtT 0 1 0 1 1 0 1 0 1 1 1 1 1
ybtU 0 1 0 1 1 0 1 0 1 1 1 1 1
ybtX 0 1 0 1 1 0 1 0 1 1 1 1 1
ykgK/ecpR 0 0 1 0 0 1 1 0 0 0 1 0 0

VF Genes UpsetR

#----------------Step3: UpsetR Plot VFGenes

abr_upsetR_df <- Abricate_vf_df %>% select(VF_GENE,Species_ContigClass)
#View(abr_upsetR_df)

abr_upsetR_lt <- split(abr_upsetR_df$VF_GENE, abr_upsetR_df$Species_ContigClass) 
abr_comb_mat = make_comb_mat(abr_upsetR_lt)

col_size = comb_size(abr_comb_mat)
row_size = set_size(abr_comb_mat)

vf_ht = UpSet(abr_comb_mat, 
           row_title = "Species#SeqClassification", column_title = "Virulence factor genes Intersection across species",
           pt_size = unit(6, "pt"),
           lwd = unit(2, "pt"),
           #comb_col = "red",
           # bg_col = c("#d6b23b","#81dce2","#acf49a","#a784d8","#acf49a","#81dce2","#1a51dd","#1d7a01","#683cd8","#d62c4c","#683cd8","#1d7a01" ,
           #            "#dd71c0","#1a51dd","#dd71c0","#d62c4c"),
           #bg_col = upset_row_col$Color,
           top_annotation = upset_top_annotation(abr_comb_mat, gp = gpar(fill = "#009797"), add_numbers = FALSE, 
                                                 bar_width = 0.5, annotation_name_rot = 90),
           right_annotation = upset_right_annotation(abr_comb_mat, add_numbers = TRUE, gp = gpar(fill = "#009797")),
           show_row_names = TRUE,
           row_names_gp = gpar(fontsize = 9), #changes font size of "set size" labels
           width = unit(900, units = "pt"), height = unit(12, "cm"))

vf_ht = draw(vf_ht)
#ht
col_od = column_order(vf_ht)
row_od = row_order(vf_ht)

decorate_annotation("intersection_size", {
  grid.text(col_size[col_od], 
            seq_len(length(col_size)), 
            unit(col_size[col_od], "native") + unit(2, "mm"), 
            default.units = "native", just = "bottom",
            gp = gpar(fontsize = 8))
})

Table4

VF Genes proportions in Chromsome and plasmids by Species

vf_geneCounts <- Abricate_vf_df %>% 
  separate(Species_ContigClass, sep = "#", into = c("Species", "ContigClass")) %>%
  select(Species,VF_GENE,ContigClass) %>% 
  unique() %>% 
  mutate(Species_VF_GENE=paste(Species, VF_GENE, sep = '#')) %>% 
  select(Species_VF_GENE,ContigClass) %>% 
  table() %>% as.data.frame.matrix() %>% 
  mutate(BothChr_AND_Plasmid = case_when(Chromosome == 1 & Plasmid == 1 ~ 1, TRUE ~ 0)) %>% 
  tibble::rownames_to_column("Species#VFgenes") %>% 
  separate("Species#VFgenes", sep = "#", into = c("Species", "VFGene")) %>% 
  group_by(Species) %>% 
  summarise(Count_VFgenes_in_Chr_old=sum(Chromosome), Count_VFgenes_in_Plasmid_old=sum(Plasmid), Count_VFgenes_in_Chr_Plasmid=sum(BothChr_AND_Plasmid)) %>% 
  mutate(Count_VFgenes_in_Chr = Count_VFgenes_in_Chr_old - Count_VFgenes_in_Chr_Plasmid, 
         Count_VFgenes_in_Plasmid = Count_VFgenes_in_Plasmid_old - Count_VFgenes_in_Chr_Plasmid) %>% 
  select(Species,Count_VFgenes_in_Chr,Count_VFgenes_in_Plasmid,Count_VFgenes_in_Chr_Plasmid) %>% 
  column_to_rownames(var="Species")

# divides each cell value with corresponding row sum value
vf_geneCounts_props <- vf_geneCounts*100/rowSums(vf_geneCounts) 

vf_geneCounts_props %>% 
  kbl(caption = "VF Genes proportions in Chromsome and plasmids by Species") %>% 
  kable_classic(full_width = F, html_font = "Cambria")
VF Genes proportions in Chromsome and plasmids by Species
Count_VFgenes_in_Chr Count_VFgenes_in_Plasmid Count_VFgenes_in_Chr_Plasmid
100.00000 0.000000 0.00000
cfreundii 87.77778 12.222222 0.00000
cronobacter 100.00000 0.000000 0.00000
ecloacae 81.25000 18.750000 0.00000
ecoli 80.71066 4.060914 15.22843
kaerogenes 100.00000 0.000000 0.00000
koxytoca 100.00000 0.000000 0.00000
kpneumoniae 37.14286 28.571429 34.28571
senterica 100.00000 0.000000 0.00000

Plot2

VF Genes Percentages in Chromsome and plasmids by Species

vf_geneCounts_props %>% 
  rownames_to_column(var="Species") %>% 
  reshape2::melt(variable = "VF Gene location", value.name = "Percentage") %>% 
  na_if(0) %>% 
  ggplot(aes(x = Species, y=Percentage, fill = `VF Gene location`, label = Percentage)) + 
  geom_bar(width = 0.4, position = position_stack(), stat = "identity")+
  scale_fill_manual(labels=c('Chromosome', 'Plasmid',"Both"), values = c("#fcb6b1", "#facc96", "#4dbfbc")) +
  geom_text(aes(label=paste0(sprintf("%1.1f", Percentage),"%")), position=position_stack(vjust=0.5)) +
  scale_x_discrete(expand = c(0,0)) +
  ylab('Percentage')+
  xlab('Species')+
  ggtitle('Percentages of Virulence factor genes by their location')+
  theme_classic() +
  theme(axis.text.y=element_text(size=16,  vjust = 0, hjust = 1),
        axis.text.x=element_text(size=16, angle=90, vjust = 0, hjust = 1.2),
        axis.line=element_blank(),
        axis.ticks=element_blank(),
        legend.position="top") + coord_flip()

Insertion Sequence (IS)

Table1

Percentages of Samples carrying Insertion Sequences in Chromsomes and Plasmids across Species

###@@@@@@@@@@@@@@ Dataframe6 - Prokka Insertion Sequences 

Prokka_IS_df <- data.table::fread(file = "6_Prokka_InsertionSequences/CPE_Trans_Prokka_IS.tab", sep = "\t", header = TRUE)
#head(Prokka_IS_df)

colnames(Prokka_IS_df) <- c("Fasta", "Contig", "Fasta_Contig", "IS")

#----------------Step2: Combining plasClass dataframe with Prokka_IS dataframe for plotting Transposons

Prokka_IS_plasClass_df <- left_join(Prokka_IS_df, plasClass_Prob_df, by = c("Fasta_Contig")) %>% 
  select(Fasta.x,Contig.x,Fasta_Contig,IS,Classification,Species_ContigClass)

#head(Prokka_IS_plasClass_df)

Prokka_IS_plasClass_df <- Prokka_IS_plasClass_df %>% filter(Fasta.x %in% hybridAssemb_1198list) # Prokka_IS_plasClass_df now has only 1198 assembly information

# Prokka_IS_plasClass_df %>%
#   select(Transposon,Species_ContigClass) %>%
#   table() %>% # frequency of unique transposons
#   kbl(caption = "Transposons and their counts in Chromosomes and Plasmids across Species") %>%
#   kable_classic(full_width = F, html_font = "Cambria")

customGreen0 = "#DeF7E9"
customRed = "#ff7f7f"
Prokka_IS_plasClass_df_total_col = Prokka_IS_plasClass_df %>% 
  select(Fasta.x,IS,Species_ContigClass) %>% 
  unique() %>%
  select(Species_ContigClass) %>% 
  unique() %>% 
  count() %>% c()


# percentages of IS (https://clarewest.github.io/blog/post/making-tables-shiny/)
Prokka_IS_plasClass_df %>%
  select(Fasta.x,IS,Species_ContigClass) %>%
  unique() %>%
  select(IS,Species_ContigClass) %>%
  count(IS, Species_ContigClass) %>%
  rename(Freq = n) %>%
  drop_na(Species_ContigClass) %>%
  mutate(Species_ContigClass2=Species_ContigClass) %>%
  separate(Species_ContigClass2, sep = "#", into = c("Species", "ContigClass")) %>%
  left_join(Species_Counts, by = c("Species")) %>%
  mutate(SamplePrcnt_wGeneElement=Freq*100/SampleCount) %>%
  select(IS,Species_ContigClass,SamplePrcnt_wGeneElement) %>%
  data.table::dcast(IS~Species_ContigClass, value.var="SamplePrcnt_wGeneElement") %>%
  replace(is.na(.), 0) %>%
  mutate_if(is.numeric, round, digits=1) %>% 
  formattable(., align = c("l",rep("c", Prokka_IS_plasClass_df_total_col$n)), 
              list(
                `IS` = formatter("span", style = ~ style(color = "grey",font.weight = "bold")), 
                area(col = 1:Prokka_IS_plasClass_df_total_col$n+1) ~ color_tile(customGreen0, customRed))) %>% 
  as.datatable(escape = FALSE,
               options = list(scrollX = FALSE, dom = 'lfti',
                              lengthMenu = list(c(5, 10, 25, 100, -1), c('5','10','25', '100', 'All')),
                              pageLength = 5),
               rownames = FALSE)

Table2

Insertion Sequences and their unique counts in Chromsomes and Plasmids across Species

Prokka_IS_plasClass_df %>% 
  select(IS,Species_ContigClass) %>% 
  unique() %>% 
  table() %>% # frequency of unique transposons
  kbl(caption = "Insertion Sequence and their unique counts in Chromsomes and Plasmids across Species") %>% 
  kable_classic(full_width = F, html_font = "Cambria")
Insertion Sequence and their unique counts in Chromsomes and Plasmids across Species
-#Chromosome -#Plasmid cfreundii#Chromosome cfreundii#Plasmid cronobacter#Chromosome cronobacter#Plasmid ecloacae#Chromosome ecloacae#Plasmid ecoli#Chromosome ecoli#Plasmid kaerogenes#Chromosome kaerogenes#Plasmid koxytoca#Chromosome koxytoca#Plasmid kpneumoniae#Chromosome kpneumoniae#Plasmid senterica#Chromosome senterica#Plasmid
IS100 0 1 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0
IS1006 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0
IS100kyp 0 1 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0
IS10A 1 0 0 1 0 1 0 1 0 1 0 0 0 0 0 1 0 0
IS10R 1 1 0 1 0 0 1 1 1 1 1 1 0 0 1 1 0 0
IS1133 0 0 0 1 0 0 1 1 1 1 0 0 0 0 0 1 0 0
IS1203 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0
IS1222 0 0 0 1 0 0 1 1 0 0 0 1 0 0 0 0 0 0
IS1230B 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0
IS1247 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
IS1271 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
IS1294 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
IS1326 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0
IS1351 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
IS1353 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
IS1394 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
IS1396 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0
IS1397 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0
IS1400 1 0 1 0 1 0 1 0 0 0 1 1 0 0 1 1 0 0
IS1414 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 1 0 0
IS1489v1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
IS1491 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
IS15 0 0 0 1 0 0 0 1 1 1 0 1 0 1 1 1 0 0
IS150 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
IS1541B 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
IS15DI 1 1 1 1 0 1 1 1 1 1 0 1 0 1 1 1 0 0
IS15DII 0 0 1 1 0 0 1 1 1 1 0 0 0 0 1 1 0 0
IS15DIV 1 1 1 1 0 1 1 1 1 1 1 1 0 1 1 1 0 0
IS1618 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
IS186A 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
IS186B 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
IS1A 0 1 1 1 0 0 1 1 1 1 0 1 0 1 1 1 0 0
IS1B 1 1 1 1 0 1 1 1 1 1 0 1 0 1 1 1 0 0
IS1D 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0
IS1F 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
IS1G 0 0 1 1 0 0 1 1 1 1 0 0 0 0 1 1 0 0
IS1H 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
IS1N 0 0 1 0 0 0 1 1 1 1 1 0 0 1 1 1 0 0
IS1S 0 0 0 0 0 0 1 1 1 1 0 0 0 0 1 1 0 0
IS1SD 0 0 1 1 0 0 1 0 1 1 0 1 0 1 1 1 0 0
IS1X1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 1 0 0
IS1X2 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 0
IS1X3 1 1 1 1 0 1 1 1 1 1 0 0 1 1 1 1 0 0
IS1X4 1 1 1 1 0 0 1 1 1 1 0 1 1 0 1 1 0 0
IS2 0 0 1 1 0 0 1 1 1 1 0 0 1 0 1 1 0 0
IS200C 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
IS200F 0 0 1 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0
IS21 0 0 1 1 0 0 1 0 1 1 0 0 0 0 0 0 0 0
IS231L 1 0 1 0 0 0 1 0 1 0 0 0 1 0 1 0 0 0
IS285 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0
IS3 0 0 1 1 0 0 1 1 1 1 0 0 0 0 0 1 0 0
IS3000 0 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 0 0
IS30D 0 0 1 1 0 0 1 1 1 1 0 0 1 1 1 1 0 0
IS30H 1 0 1 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0
IS3411 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
IS3F 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
IS3H 0 0 1 1 0 0 1 1 1 1 0 0 0 0 1 1 0 0
IS4 0 0 1 1 0 0 1 0 1 1 0 0 1 0 0 0 0 0
IS406 0 1 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0
IS421 0 0 0 1 0 0 0 1 1 1 0 0 0 0 1 1 0 0
IS4321 0 1 1 1 0 0 1 1 0 1 0 0 0 0 1 1 0 0
IS4321R 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 0 0
IS4811 1 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0
IS5075 0 1 1 1 0 0 1 1 1 1 0 1 1 1 1 1 0 0
IS50R 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0
IS5564 1 0 1 0 0 0 1 0 1 0 1 0 1 0 1 1 0 0
IS5708 1 1 1 1 0 0 1 1 1 1 0 0 0 1 0 0 0 0
IS5B 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 1 0 0
IS5D 1 1 1 1 0 0 1 1 1 1 0 0 1 1 1 1 0 0
IS606 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
IS609 1 0 1 1 0 0 1 0 1 1 0 0 0 0 1 1 0 0
IS6100 1 1 0 1 0 1 1 1 1 1 0 1 0 1 1 1 0 0
IS621 0 0 0 0 0 0 1 1 1 1 0 0 0 0 1 0 0 0
IS629 1 0 0 1 0 0 0 1 1 1 0 0 0 0 1 1 0 0
IS630 0 0 1 0 0 0 1 0 1 1 1 1 0 0 1 0 0 0
IS679 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0
IS682 0 0 0 0 0 0 0 0 1 1 0 0 0 1 1 1 0 0
IS884 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
IS903 0 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 0
IS903B 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 0
IS91 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
IS911 0 0 1 1 0 0 1 1 1 1 0 0 0 0 0 0 0 0
ISAav1 0 0 0 0 0 0 1 1 0 1 0 0 0 0 1 1 0 0
ISAba1 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
ISAba125 0 1 0 1 0 1 1 1 1 1 0 1 0 1 0 1 0 1
ISAba14 0 0 0 1 0 0 1 1 1 1 0 0 0 0 0 1 0 0
ISAba33 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ISAba42 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
ISAba43 1 1 1 0 0 0 0 1 1 1 0 1 0 0 1 1 0 0
ISAba8 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ISAbe14 1 0 1 0 0 0 1 1 1 1 0 0 0 0 1 1 0 0
ISAcba1 0 0 0 1 0 0 1 0 1 1 0 0 1 0 1 1 0 0
ISAeca1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0
ISAeme15 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0
ISAeme19 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0
ISAeme9 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ISAfe13 0 1 1 1 0 1 1 1 0 1 0 1 0 1 1 1 0 1
ISAlde2 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0
ISAlw16 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
ISApl1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISApu1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0
ISApu2 0 1 1 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0
ISArsp9 1 1 1 1 0 0 1 1 1 1 0 0 0 1 1 1 0 0
ISAs17 0 0 1 1 0 0 0 1 0 1 1 1 0 0 1 1 0 0
ISAs19 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
ISAs25 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0
ISAs29 0 0 1 1 0 0 1 1 0 0 0 0 0 0 1 1 0 0
ISAs9 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISAzs36 0 0 1 1 0 0 1 1 0 1 0 1 0 0 0 1 0 0
ISBcen15 0 0 1 0 0 0 1 0 1 0 0 1 0 0 1 1 0 0
ISBdi12 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
ISBli29 1 1 1 1 0 1 1 1 1 1 0 1 0 1 1 1 0 0
ISBli6 1 0 1 1 0 0 0 0 1 1 0 0 0 0 1 1 0 0
ISBmo1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0
ISBmu20 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0
ISBos1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ISBrsa1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
ISBsp7 1 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0
ISBth5 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
ISCARN93 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
ISCco2 0 0 0 1 0 0 1 1 1 1 0 0 0 0 0 1 0 0
ISCco3 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
ISCep1 1 0 1 0 1 0 1 0 1 1 1 0 1 0 1 0 1 0
ISCfr1 1 1 1 1 0 1 1 1 1 1 0 0 0 1 1 1 0 0
ISCfr12 0 0 0 0 0 0 0 0 1 1 0 1 0 0 1 1 0 0
ISCfr13 0 0 1 0 0 0 1 1 0 0 1 0 0 0 1 1 0 0
ISCfr14 0 0 1 1 0 0 1 1 0 1 0 1 0 0 1 1 0 0
ISCfr26 0 0 1 1 0 0 1 1 0 1 0 0 0 0 0 1 0 0
ISCfr3 0 0 1 1 0 1 0 1 1 1 0 0 0 0 0 1 0 0
ISCfr4 1 0 1 1 0 0 0 0 1 0 1 1 0 0 1 1 0 0
ISCfr6 0 1 0 0 0 0 1 1 1 1 1 0 0 0 1 1 0 0
ISCfr8 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0
ISCgl1 1 0 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0
ISCot1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 0
ISCro1 0 0 1 0 0 0 0 0 1 1 0 0 0 0 1 1 0 0
ISCro2 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
ISCro3 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISCro4 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0
ISCro5 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
ISCro6 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ISCte8 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0
ISDsh3 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0
ISEae1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0
ISEal1 1 0 1 1 0 0 1 1 0 0 0 1 1 1 1 1 0 0
ISEam1 1 0 1 1 0 0 1 1 1 0 1 0 0 0 1 1 0 0
ISEas1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
ISEc1 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISEc10 0 0 1 0 0 0 0 0 1 1 0 0 1 1 1 1 0 0
ISEc11 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
ISEc12 1 0 1 1 0 0 1 0 1 1 0 0 0 0 1 1 0 0
ISEc13 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0
ISEc14 1 0 1 1 0 0 1 1 1 1 0 1 0 0 1 1 0 0
ISEc15 0 1 1 1 0 0 1 1 0 1 0 0 1 0 1 1 0 0
ISEc16 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISEc17 1 0 1 1 0 0 1 1 1 1 0 0 0 1 1 1 0 0
ISEc18 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISEc19 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
ISEc20 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
ISEc21 0 0 1 1 0 0 1 0 1 1 0 0 0 1 1 1 0 0
ISEc22 0 0 1 0 0 0 0 0 1 1 0 0 0 0 1 1 0 0
ISEc23 1 1 0 1 0 0 1 1 1 1 0 0 0 0 1 1 0 0
ISEc24 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 0 0
ISEc25 1 0 1 1 0 0 1 1 0 1 0 0 0 0 1 1 0 0
ISEc26 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISEc27 0 0 1 1 0 0 1 1 1 1 0 0 1 1 1 1 0 0
ISEc29 0 1 0 1 0 1 0 1 1 1 0 1 0 1 1 1 0 0
ISEc30 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
ISEc31 0 0 1 0 0 0 1 0 1 1 1 0 0 0 1 1 0 0
ISEc32 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0
ISEc33 1 1 1 1 0 1 1 1 1 1 0 1 0 1 1 1 0 1
ISEc35 0 1 1 1 0 1 1 1 1 1 0 1 0 1 1 1 0 0
ISEc36 0 0 1 1 0 0 1 1 1 1 0 1 1 0 1 1 0 0
ISEc37 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0
ISEc38 1 0 1 1 0 0 0 1 1 1 0 1 0 1 1 1 0 0
ISEc39 1 0 1 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0
ISEc40 0 1 1 1 0 0 0 0 1 0 0 0 1 1 1 1 0 0
ISEc41 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0
ISEc42 0 0 0 1 0 0 0 0 1 1 0 0 1 0 0 1 0 0
ISEc43 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISEc44 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISEc45 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISEc46 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0
ISEc47 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0
ISEc48 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
ISEc49 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
ISEc5 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
ISEc52 1 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 0 0
ISEc53 1 0 1 0 0 0 1 0 1 1 1 0 0 0 1 0 0 0
ISEc59 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0
ISEc60 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0
ISEc62 0 0 1 1 0 0 1 1 1 1 0 1 0 0 0 1 0 0
ISEc63 0 1 1 1 0 0 1 1 1 1 0 0 0 1 0 1 0 0
ISEc66 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISEc68 0 1 1 0 0 0 1 1 1 1 0 0 0 0 1 1 0 0
ISEc76 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISEc77 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0
ISEc78 1 0 1 1 0 0 1 1 1 1 0 0 0 0 0 1 0 0
ISEc8 0 0 1 1 0 0 1 1 1 1 0 1 0 0 1 1 0 0
ISEc81 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0
ISEc83 1 0 1 0 0 0 1 0 1 1 0 0 0 0 1 0 0 0
ISEc84 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
ISEc86 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISEc9 1 1 1 1 0 1 1 1 1 1 0 1 0 1 1 1 0 0
ISEcB1 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
ISEch11 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
ISEch12 1 0 1 0 0 0 0 0 1 0 1 1 0 0 1 1 0 0
ISEch14 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISEch2 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
ISEch6 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
ISEcl1 0 1 1 1 0 0 1 1 1 1 0 0 1 1 1 1 0 0
ISEcl10 1 0 1 0 1 0 1 0 1 1 0 0 0 0 1 0 0 0
ISEcl11 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ISEcl3 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
ISEcl6 0 0 1 0 1 0 1 1 1 0 0 1 1 1 1 1 0 0
ISEhe2 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0
ISEhe3 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 0
ISEhe4 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 1 0 0
ISEic2 0 0 1 0 1 0 0 1 1 1 0 0 0 0 1 1 0 0
ISEisp1 1 0 1 0 0 0 1 0 1 0 1 0 0 0 1 0 0 0
ISErsp1 0 0 1 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0
ISEsa1 0 1 1 1 0 0 1 1 1 0 0 0 0 1 1 1 0 0
ISEsa2 0 1 1 1 0 0 1 1 1 1 0 0 1 1 1 1 0 0
ISEsp1 0 0 1 1 1 0 1 1 1 1 0 0 1 1 1 1 0 0
ISGme8 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
ISHce1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
ISHgi14 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
ISHne2 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0
ISHp1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
ISIde1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ISKox1 0 0 0 1 0 0 1 1 1 0 1 0 0 0 1 1 0 0
ISKox2 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISKox3 1 1 1 1 0 0 1 1 1 1 1 0 0 0 1 1 0 0
ISKpn1 1 0 1 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0
ISKpn11 0 1 1 1 0 0 1 1 1 1 0 1 0 1 1 1 0 0
ISKpn12 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0
ISKpn14 1 1 1 1 0 1 1 1 1 1 0 1 0 1 1 1 0 0
ISKpn15 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
ISKpn18 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0
ISKpn19 0 1 1 1 0 0 0 1 1 1 0 1 0 0 1 1 0 0
ISKpn2 0 0 1 0 0 0 1 0 1 0 0 0 1 0 1 1 0 0
ISKpn20 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0
ISKpn21 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 0 0
ISKpn24 1 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 0 0
ISKpn25 1 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 0 0
ISKpn26 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 0
ISKpn27 0 1 1 1 0 1 0 1 1 1 0 1 0 1 1 1 0 0
ISKpn28 0 0 0 1 0 0 0 1 1 1 0 1 1 1 1 1 0 0
ISKpn3 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
ISKpn31 1 1 1 1 0 0 1 1 1 1 0 1 0 1 1 1 0 0
ISKpn33 0 0 1 0 0 0 1 1 0 0 0 0 1 1 1 1 0 0
ISKpn34 1 0 1 1 0 0 1 1 0 1 0 0 0 0 1 1 0 0
ISKpn37 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISKpn38 0 1 1 1 0 0 1 1 1 1 0 1 1 1 1 1 0 0
ISKpn4 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0
ISKpn40 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISKpn41 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0
ISKpn42 0 0 1 0 0 0 0 1 1 1 0 0 0 0 1 1 0 0
ISKpn43 0 1 1 1 0 0 0 1 0 1 0 0 0 1 1 1 0 0
ISKpn47 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0
ISKpn49 1 0 1 1 0 0 1 1 1 0 0 0 0 1 1 1 0 0
ISKpn50 0 0 1 0 0 0 1 1 1 0 0 0 0 0 1 1 0 0
ISKpn53 1 0 1 0 1 0 1 0 1 1 1 1 0 0 1 1 0 0
ISKpn54 1 1 1 0 0 0 1 0 1 0 1 1 1 0 1 1 0 0
ISKpn6 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 1 0 0
ISKpn60 1 1 1 1 0 0 1 1 1 1 0 1 1 1 1 1 0 0
ISKpn66 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
ISKpn7 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0
ISKpn8 1 1 1 1 0 0 1 1 1 1 0 0 1 1 1 1 0 0
ISKqu3 0 1 0 0 0 0 0 1 1 1 0 1 0 1 1 1 0 0
ISLad1 1 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 0 0
ISLad2 1 1 1 1 0 0 1 1 1 1 0 0 1 1 1 1 0 0
ISLad6 0 0 1 1 0 0 1 1 1 1 0 0 1 1 0 1 0 0
ISLead2 0 0 1 1 0 0 0 0 0 1 0 0 0 0 1 1 0 0
ISLead3 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ISLpn8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
ISMaq2 0 0 1 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0
ISMasp6 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ISMph1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0
ISMtsp21 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0
ISNisp5 0 0 1 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0
ISNpu13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
ISPa16 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0
ISPa21 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
ISPa31 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISPa38 0 1 1 1 0 0 1 1 1 1 0 1 0 1 1 1 0 0
ISPa7 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
ISPa77 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
ISPa80 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
ISPa83 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
ISPa85 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
ISPa95 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 1 0 0
ISPa97 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 1 0 0
ISPa99 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0
ISPan1 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 1 0 0
ISPcc1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
ISPcc2 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
ISPcc4 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
ISPcc6 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0
ISPeat2 1 0 0 1 0 0 1 1 0 0 0 0 0 0 0 1 0 0
ISPlge2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0
ISPlge3 1 1 0 1 0 0 0 1 0 0 0 0 0 0 1 1 0 0
ISPlge4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
ISPlu15 1 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0
ISPlu22 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0
ISPlu8 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0
ISPmi3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
ISPpr10 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
ISPps1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0
ISPpu1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
ISPpu12 0 0 1 1 0 0 1 0 1 1 0 0 0 0 0 1 0 0
ISPpu22 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
ISPpu23 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
ISPpu30 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
ISPre2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
ISPrre1 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0
ISPrre10 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0
ISPrst3 1 0 1 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISPst2 0 0 1 1 0 0 0 1 1 1 0 0 0 0 1 1 0 0
ISPst3 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
ISPst4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
ISPst8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
ISPst9 0 0 1 1 0 0 1 0 1 0 0 0 0 0 0 1 0 0
ISPsy30 0 0 0 0 0 0 1 1 1 1 0 0 1 0 0 1 0 0
ISPsy42 0 1 0 1 0 0 0 1 0 1 0 0 0 0 1 1 0 0
ISPsy43 0 0 0 1 0 0 1 1 0 1 0 1 0 1 1 1 0 0
ISPye16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
ISRaq1 1 1 1 1 1 1 1 1 0 1 0 0 1 0 1 1 0 0
ISRhba1 0 0 1 0 0 0 1 1 0 0 0 0 0 0 1 0 0 0
ISRop1 0 0 0 0 0 0 0 1 1 1 0 0 0 1 1 1 0 0
ISRor2 1 0 1 1 1 0 0 1 1 0 1 0 1 1 1 1 1 0
ISRor3 1 0 1 1 1 0 0 1 1 0 1 1 1 1 1 1 0 0
ISRosp3 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0
ISRso16 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 0
ISRso21 1 0 1 0 1 0 1 0 1 0 1 0 0 0 1 0 0 0
ISSaen1 1 0 1 0 0 0 1 0 1 0 0 0 1 0 1 1 0 0
ISSav7 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
ISSba1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ISSba14 0 1 1 0 0 0 1 1 1 1 0 1 0 0 0 1 0 0
ISSba3 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ISSba8 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
ISSbo1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0
ISSd1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISSde4 0 0 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0
ISSde5 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
ISSen1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISSen10 0 0 1 0 0 0 1 0 0 1 0 0 1 1 1 0 0 0
ISSen13 0 0 1 0 0 0 0 1 1 1 0 0 1 0 1 1 0 0
ISSen3 0 0 1 1 0 0 1 1 1 1 0 0 0 0 1 1 0 0
ISSen4 1 1 1 1 0 1 1 1 1 1 0 1 0 1 1 1 0 1
ISSen6 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0
ISSen7 0 0 1 1 0 0 0 1 1 0 0 0 1 1 1 1 0 0
ISSen8 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0
ISSen9 0 0 0 1 0 0 1 1 0 1 0 0 0 1 1 1 0 0
ISSfl1 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0
ISSfl10 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISSfl3 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISSfl8 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
ISSfl9 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0
ISSgsp1 0 0 1 1 0 0 1 1 1 1 0 1 1 0 1 1 0 0
ISShdy1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
ISShdy2 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISShes11 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0
ISShes4 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
ISShfr8 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ISShfr9 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0
ISShwo2 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
ISSlo2 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISSm1 0 0 1 1 0 0 1 1 0 1 0 0 0 1 0 1 0 0
ISSm3 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0
ISSm4 1 0 1 1 1 0 1 1 1 1 1 0 0 0 1 1 1 0
ISSod16 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ISSod23 0 0 1 0 0 0 0 0 1 1 0 0 1 0 1 0 0 0
ISSod4 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0
ISSod6 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ISSoEn2 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0
ISSoEn3 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0
ISSpe2 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
ISSpr1 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
ISSpr2 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
ISSpu11 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
ISSpu2 0 0 0 1 0 0 1 1 1 1 0 0 0 0 1 1 0 0
ISSpu20 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0
ISSpu6 1 1 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0
ISSpu7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
ISSso4 1 1 0 0 0 0 0 1 1 1 0 0 0 0 0 1 0 0
ISSsu9 1 1 1 1 1 0 1 1 1 1 1 1 1 0 1 1 1 0
ISStma11 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0
ISSty2 0 0 0 0 0 0 1 0 0 0 0 1 1 0 1 1 0 0
ISSysp7 1 1 1 0 0 0 1 1 1 0 1 0 1 0 0 1 0 0
ISTfu1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
ISThi1 0 0 0 0 0 0 1 0 1 1 0 0 0 0 1 1 0 0
ISThsp9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
ISUnCu16 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0
ISVch3 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0
ISVch4 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
ISVch9 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
ISVsa17 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0
ISVsa3 0 0 0 1 0 0 0 1 1 1 0 0 0 0 1 1 0 0
ISWpi13 0 0 1 1 0 0 1 1 1 1 0 0 0 1 0 1 0 0
ISXc4 0 0 0 1 0 0 0 1 0 1 0 0 0 0 1 1 0 0
ISXne1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ISYal1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ISYen3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
ISYpe1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ISYps1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0
ISYps3 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 0
ISYps8 1 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
ISYru1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0 0
Tn2 1 1 1 1 0 1 1 1 1 1 0 1 0 1 1 1 0 0
Tn3 0 1 0 1 0 0 0 1 1 1 0 0 0 0 1 1 0 0
Tn4430 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Tn5393 0 1 0 1 0 1 1 1 1 1 0 0 0 1 1 1 0 0
Tn5403 0 1 1 1 0 1 1 1 1 1 0 1 0 1 1 1 0 1
TnAs1 1 1 1 1 0 0 1 1 1 1 0 1 0 1 1 1 0 0
TnAs2 0 1 1 1 0 0 1 1 1 1 0 1 1 0 1 1 0 0
TnAs3 1 1 1 1 0 0 1 1 1 1 0 0 0 0 1 1 0 0
TnEc1 1 0 0 1 0 0 1 1 1 1 1 1 0 1 1 1 0 0
TnShfr1 0 1 0 0 0 0 0 1 0 1 0 0 0 0 1 1 0 0
TnXax1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Insertion Sequences UpsetR

#----------------Step3: UpsetR Plot VFGenes

IS_upsetR_df <- Prokka_IS_plasClass_df  %>% select(IS,Species_ContigClass)
#View(IS_upsetR_df)

IS_upsetR_lt <- split(IS_upsetR_df$IS, IS_upsetR_df$Species_ContigClass) 
IS_comb_mat = make_comb_mat(IS_upsetR_lt)

col_size = comb_size(IS_comb_mat)
row_size = set_size(IS_comb_mat)

IS_ht = UpSet(IS_comb_mat, 
           row_title = "Species#SeqClassification", column_title = "Insertion Sequences Intersection across species",
           pt_size = unit(6, "pt"),
           lwd = unit(2, "pt"),
           #comb_col = "red",
           # bg_col = c("#d6b23b","#81dce2","#acf49a","#a784d8","#acf49a","#81dce2","#1a51dd","#1d7a01","#683cd8","#d62c4c","#683cd8","#1d7a01" ,
           #            "#dd71c0","#1a51dd","#dd71c0","#d62c4c"),
           #bg_col = upset_row_col$Color,
           top_annotation = upset_top_annotation(IS_comb_mat, gp = gpar(fill = "#009797"), add_numbers = FALSE, 
                                                 bar_width = 0.5, annotation_name_rot = 90),
           right_annotation = upset_right_annotation(IS_comb_mat, add_numbers = TRUE, gp = gpar(fill = "#009797")),
           show_row_names = TRUE,
           row_names_gp = gpar(fontsize = 9), #changes font size of "set size" labels
           width = unit(900, units = "pt"), height = unit(12, "cm"))

IS_ht = draw(IS_ht)
#ht
col_od = column_order(IS_ht)
row_od = row_order(IS_ht)

decorate_annotation("intersection_size", {
  grid.text(col_size[col_od], 
            seq_len(length(col_size)), 
            unit(col_size[col_od], "native") + unit(2, "mm"), 
            default.units = "native", just = "bottom",
            gp = gpar(fontsize = 8))
})

Table4

Insertion Sequences proportions in Chromsome and plasmids by Species

Prokka_ISCounts <- Prokka_IS_plasClass_df %>% 
  separate(Species_ContigClass, sep = "#", into = c("Species", "ContigClass")) %>%
  select(Species,IS,ContigClass) %>% 
  unique() %>% 
  mutate(Species_IS=paste(Species, IS, sep = '#')) %>% 
  select(Species_IS,ContigClass) %>% 
  table() %>% as.data.frame.matrix() %>% 
  mutate(BothChr_AND_Plasmid = case_when(Chromosome == 1 & Plasmid == 1 ~ 1, TRUE ~ 0)) %>% 
  tibble::rownames_to_column("Species#VFgenes") %>% 
  separate("Species#VFgenes", sep = "#", into = c("Species", "VFGene")) %>% 
  group_by(Species) %>% 
  summarise(Count_IS_in_Chr_old=sum(Chromosome), Count_IS_in_Plasmid_old=sum(Plasmid), Count_IS_in_Chr_Plasmid=sum(BothChr_AND_Plasmid)) %>% 
  mutate(Count_IS_in_Chr = Count_IS_in_Chr_old - Count_IS_in_Chr_Plasmid, 
         Count_IS_in_Plasmid = Count_IS_in_Plasmid_old - Count_IS_in_Chr_Plasmid) %>% 
  select(Species,Count_IS_in_Chr,Count_IS_in_Plasmid,Count_IS_in_Chr_Plasmid) %>% 
  column_to_rownames(var="Species")

# divides each cell value with corresponding row sum value
Prokka_ISCounts_props <- Prokka_ISCounts*100/rowSums(Prokka_ISCounts) 

Prokka_ISCounts_props %>% 
  kbl(caption = "Insertion Sequences proportions in Chromsome and plasmids by Species") %>% 
  kable_classic(full_width = F, html_font = "Cambria")
Insertion Sequences proportions in Chromsome and plasmids by Species
Count_IS_in_Chr Count_IS_in_Plasmid Count_IS_in_Chr_Plasmid
40.64516 29.67742 29.677419
cfreundii 32.28700 17.48879 50.224215
cronobacter 40.90909 56.81818 2.272727
ecloacae 28.51064 20.42553 51.063830
ecoli 24.00000 16.00000 60.000000
kaerogenes 22.77228 52.47525 24.752475
koxytoca 27.19298 42.98246 29.824561
kpneumoniae 12.72085 30.03534 57.243816
senterica 58.33333 41.66667 0.000000

Plot2

Insertion Sequences Percentages in Chromsome and plasmids by Species

Prokka_ISCounts_props %>% 
  rownames_to_column(var="Species") %>% 
  reshape2::melt(variable = "Insertion Sequences location", value.name = "Percentage") %>% 
  na_if(0) %>% 
  ggplot(aes(x = Species, y=Percentage, fill = `Insertion Sequences location`, label = Percentage)) + 
  geom_bar(width = 0.4, position = position_stack(), stat = "identity")+
  scale_fill_manual(labels=c('Chromosome', 'Plasmid',"Both"), values = c("#fcb6b1", "#facc96", "#4dbfbc")) +
  geom_text(aes(label=paste0(sprintf("%1.1f", Percentage),"%")), position=position_stack(vjust=0.5)) +
  scale_x_discrete(expand = c(0,0)) +
  ylab('Percentage')+
  xlab('Species')+
  ggtitle('Percentages of Insertion Sequences by their location')+
  theme_classic() +
  theme(axis.text.y=element_text(size=16,  vjust = 0, hjust = 1),
        axis.text.x=element_text(size=16, angle=90, vjust = 0, hjust = 1.2),
        axis.line=element_blank(),
        axis.ticks=element_blank(),
        legend.position="top") + coord_flip()